megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +278 -197
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +55 -69
- api/batch_processing/postprocessing/md_to_labelme.py +1 -0
- api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +65 -44
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +1 -1
- data_management/importers/animl_results_to_md_results.py +2 -2
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +34 -6
- data_management/labelme_to_yolo.py +1 -1
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/resize_coco_dataset.py +12 -10
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +34 -21
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +104 -24
- detection/run_inference_with_yolov5_val.py +127 -26
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +3 -1
- md_utils/ct_utils.py +79 -3
- md_utils/md_tests.py +253 -15
- md_utils/path_utils.py +129 -24
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +1 -0
- md_visualization/visualization_utils.py +17 -2
- md_visualization/visualize_db.py +8 -0
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
|
@@ -14,18 +14,6 @@
|
|
|
14
14
|
# the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
|
|
15
15
|
# script):
|
|
16
16
|
#
|
|
17
|
-
# * You can specify the MegaDetector location, but you may find it useful to use the same paths
|
|
18
|
-
# I use; on all the machines where I run MD, I keep all versions of MegaDetector handy at these
|
|
19
|
-
# paths:
|
|
20
|
-
#
|
|
21
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt
|
|
22
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt
|
|
23
|
-
# ~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb
|
|
24
|
-
#
|
|
25
|
-
# On Windows, this translates to, for example:
|
|
26
|
-
#
|
|
27
|
-
# c:\users\dmorr\models\camera_traps\megadetector\md_v5.0.0\md_v5a.0.0.pt
|
|
28
|
-
#
|
|
29
17
|
# * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
|
|
30
18
|
# say I'm running a job for an organization called "bibblebop"; I have a big folder of
|
|
31
19
|
# job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
|
|
@@ -78,6 +66,7 @@ import json
|
|
|
78
66
|
import os
|
|
79
67
|
import stat
|
|
80
68
|
import time
|
|
69
|
+
import re
|
|
81
70
|
|
|
82
71
|
import humanfriendly
|
|
83
72
|
|
|
@@ -90,12 +79,12 @@ from md_utils.ct_utils import split_list_into_n_chunks
|
|
|
90
79
|
|
|
91
80
|
from detection.run_detector_batch import load_and_run_detector_batch, write_results_to_file
|
|
92
81
|
from detection.run_detector import DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
82
|
+
from detection.run_detector import estimate_md_images_per_second
|
|
93
83
|
|
|
94
84
|
from api.batch_processing.postprocessing.postprocess_batch_results import (
|
|
95
85
|
PostProcessingOptions, process_batch_results)
|
|
96
86
|
from detection.run_detector import get_detector_version_from_filename
|
|
97
|
-
|
|
98
|
-
max_task_name_length = 92
|
|
87
|
+
from md_utils.ct_utils import image_file_to_camera_folder
|
|
99
88
|
|
|
100
89
|
# To specify a non-default confidence threshold for including detections in the .json file
|
|
101
90
|
json_threshold = None
|
|
@@ -103,61 +92,108 @@ json_threshold = None
|
|
|
103
92
|
# Turn warnings into errors if more than this many images are missing
|
|
104
93
|
max_tolerable_failed_images = 100
|
|
105
94
|
|
|
95
|
+
# Should we supply the --image_queue_option to run_detector_batch.py? I only set this
|
|
96
|
+
# when I have a very slow drive and a comparably fast GPU. When this is enabled, checkpointing
|
|
97
|
+
# is not supported within a job, so I set n_jobs to a large number (typically 100).
|
|
106
98
|
use_image_queue = False
|
|
107
99
|
|
|
108
100
|
# Only relevant when we're using a single GPU
|
|
109
101
|
default_gpu_number = 0
|
|
110
102
|
|
|
103
|
+
# Should we supply --quiet to run_detector_batch.py?
|
|
111
104
|
quiet_mode = True
|
|
112
105
|
|
|
113
106
|
# Specify a target image size when running MD... strongly recommended to leave this at "None"
|
|
107
|
+
#
|
|
108
|
+
# When using augmented inference, if you leave this at "None", run_inference_with_yolov5_val.py
|
|
109
|
+
# will use its default size, which is 1280 * 1.3, which is almost always what you want.
|
|
114
110
|
image_size = None
|
|
115
111
|
|
|
116
112
|
# Only relevant when running on CPU
|
|
117
113
|
ncores = 1
|
|
118
114
|
|
|
119
|
-
# OS-specific script line continuation character
|
|
115
|
+
# OS-specific script line continuation character (modified later if we're running on Windows)
|
|
120
116
|
slcc = '\\'
|
|
121
117
|
|
|
122
|
-
# OS-specific script comment character
|
|
118
|
+
# OS-specific script comment character (modified later if we're running on Windows)
|
|
123
119
|
scc = '#'
|
|
124
120
|
|
|
121
|
+
# # OS-specific script extension (modified later if we're running on Windows)
|
|
125
122
|
script_extension = '.sh'
|
|
126
123
|
|
|
124
|
+
# If False, we'll load chunk files with file lists if they exist
|
|
125
|
+
force_enumeration = False
|
|
126
|
+
|
|
127
127
|
# Prefer threads on Windows, processes on Linux
|
|
128
128
|
parallelization_defaults_to_threads = False
|
|
129
129
|
|
|
130
130
|
# This is for things like image rendering, not for MegaDetector
|
|
131
131
|
default_workers_for_parallel_tasks = 30
|
|
132
132
|
|
|
133
|
+
overwrite_handling = 'skip' # 'skip', 'error', or 'overwrite'
|
|
134
|
+
|
|
135
|
+
# Only relevant to repeat detection elimination; try to identify EK113/RCNX101-style
|
|
136
|
+
# overflow folders and treat them as the same camera
|
|
137
|
+
overflow_folder_handling_enabled = True
|
|
138
|
+
|
|
139
|
+
# The function used to get camera names from image paths; can also replace this
|
|
140
|
+
# with a custom function.
|
|
141
|
+
relative_path_to_location = image_file_to_camera_folder
|
|
142
|
+
|
|
143
|
+
# This will be the .json results file after RDE; if this is still None when
|
|
144
|
+
# we get to classification stuff, that will indicate that we didn't do RDE.
|
|
145
|
+
filtered_output_filename = None
|
|
146
|
+
|
|
147
|
+
if os.name == 'nt':
|
|
148
|
+
|
|
149
|
+
slcc = '^'
|
|
150
|
+
scc = 'REM'
|
|
151
|
+
script_extension = '.bat'
|
|
152
|
+
|
|
153
|
+
# My experience has been that Python multiprocessing is flaky on Windows, so
|
|
154
|
+
# default to threads on Windows
|
|
155
|
+
parallelization_defaults_to_threads = True
|
|
156
|
+
default_workers_for_parallel_tasks = 10
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
## Constants related to using YOLOv5's val.py
|
|
160
|
+
|
|
133
161
|
# Should we use YOLOv5's val.py instead of run_detector_batch.py?
|
|
134
162
|
use_yolo_inference_scripts = False
|
|
135
163
|
|
|
136
|
-
# Directory in which to run val.py
|
|
164
|
+
# Directory in which to run val.py (relevant for YOLOv5, not for YOLOv8)
|
|
137
165
|
yolo_working_dir = os.path.expanduser('~/git/yolov5')
|
|
138
166
|
|
|
167
|
+
# Only used for loading the mapping from class indices to names
|
|
168
|
+
yolo_dataset_file = None
|
|
169
|
+
|
|
170
|
+
# 'yolov5' or 'yolov8'; assumes YOLOv5 if this is None
|
|
171
|
+
yolo_model_type = None
|
|
172
|
+
|
|
173
|
+
# inference batch size
|
|
174
|
+
yolo_batch_size = 1
|
|
175
|
+
|
|
139
176
|
# Should we remove intermediate files used for running YOLOv5's val.py?
|
|
140
177
|
#
|
|
141
178
|
# Only relevant if use_yolo_inference_scripts is True.
|
|
142
|
-
remove_yolo_intermediate_results =
|
|
143
|
-
remove_yolo_symlink_folder =
|
|
179
|
+
remove_yolo_intermediate_results = True
|
|
180
|
+
remove_yolo_symlink_folder = True
|
|
144
181
|
use_symlinks_for_yolo_inference = True
|
|
182
|
+
write_yolo_debug_output = False
|
|
145
183
|
|
|
146
|
-
|
|
184
|
+
# Should we apply YOLOv5's test-time augmentation?
|
|
185
|
+
augment = False
|
|
147
186
|
|
|
148
|
-
# Set later if EK113/RCNX101-style overflow folders are being handled in this dataset
|
|
149
|
-
overflow_folder_handling_enabled = False
|
|
150
187
|
|
|
151
|
-
|
|
152
|
-
# is True.
|
|
153
|
-
augment = False
|
|
188
|
+
## Constants related to tiled inference
|
|
154
189
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
190
|
+
use_tiled_inference = True
|
|
191
|
+
|
|
192
|
+
# Should we delete tiles after each job? Only set this to False for debugging;
|
|
193
|
+
# large jobs will take up a lot of space if you keep tiles around after each task.
|
|
194
|
+
remove_tiles = True
|
|
195
|
+
tile_size = (1280,1280)
|
|
196
|
+
tile_overlap = 0.2
|
|
161
197
|
|
|
162
198
|
|
|
163
199
|
#%% Constants I set per script
|
|
@@ -165,9 +201,11 @@ if os.name == 'nt':
|
|
|
165
201
|
input_path = '/drive/organization'
|
|
166
202
|
|
|
167
203
|
assert not (input_path.endswith('/') or input_path.endswith('\\'))
|
|
204
|
+
assert os.path.isdir(input_path), 'Could not find input folder {}'.format(input_path)
|
|
205
|
+
input_path = input_path.replace('\\','/')
|
|
168
206
|
|
|
169
207
|
organization_name_short = 'organization'
|
|
170
|
-
job_date = None # '
|
|
208
|
+
job_date = None # '2024-01-01'
|
|
171
209
|
assert job_date is not None and organization_name_short != 'organization'
|
|
172
210
|
|
|
173
211
|
# Optional descriptor
|
|
@@ -178,9 +216,7 @@ if job_tag is None:
|
|
|
178
216
|
else:
|
|
179
217
|
job_description_string = '-' + job_tag
|
|
180
218
|
|
|
181
|
-
model_file =
|
|
182
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt')
|
|
183
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
|
|
219
|
+
model_file = 'MDV5A' # 'MDV5A', 'MDV5B', 'MDV4'
|
|
184
220
|
|
|
185
221
|
postprocessing_base = os.path.expanduser('~/postprocessing')
|
|
186
222
|
|
|
@@ -194,16 +230,12 @@ n_gpus = 2
|
|
|
194
230
|
# checkpointing. Don't worry, this will be assert()'d in the next cell.
|
|
195
231
|
checkpoint_frequency = 10000
|
|
196
232
|
|
|
197
|
-
#
|
|
198
|
-
|
|
199
|
-
if ('v5') in model_file:
|
|
200
|
-
gpu_images_per_second = 10
|
|
201
|
-
else:
|
|
202
|
-
gpu_images_per_second = 2.9
|
|
233
|
+
# Estimate inference speed for the current GPU
|
|
234
|
+
approx_images_per_second = estimate_md_images_per_second(model_file)
|
|
203
235
|
|
|
204
|
-
# Rough estimate for
|
|
236
|
+
# Rough estimate for the inference time cost of augmentation
|
|
205
237
|
if augment:
|
|
206
|
-
|
|
238
|
+
approx_images_per_second = approx_images_per_second * 0.7
|
|
207
239
|
|
|
208
240
|
base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
|
|
209
241
|
get_detector_version_from_filename(model_file)
|
|
@@ -224,6 +256,14 @@ if augment:
|
|
|
224
256
|
assert use_yolo_inference_scripts,\
|
|
225
257
|
'Augmentation is only supported when running with the YOLO inference scripts'
|
|
226
258
|
|
|
259
|
+
if use_tiled_inference:
|
|
260
|
+
assert not augment, \
|
|
261
|
+
'Augmentation is not supported when using tiled inference'
|
|
262
|
+
assert not use_yolo_inference_scripts, \
|
|
263
|
+
'Using the YOLO inference script is not supported when using tiled inference'
|
|
264
|
+
assert checkpoint_frequency is None, \
|
|
265
|
+
'Checkpointing is not supported when using tiled inference'
|
|
266
|
+
|
|
227
267
|
filename_base = os.path.join(base_output_folder_name, base_task_name)
|
|
228
268
|
combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
|
|
229
269
|
postprocessing_output_folder = os.path.join(filename_base, 'preview')
|
|
@@ -240,24 +280,17 @@ print('Output folder:\n{}'.format(filename_base))
|
|
|
240
280
|
|
|
241
281
|
#%% Enumerate files
|
|
242
282
|
|
|
243
|
-
|
|
283
|
+
# Have we already listed files for this job?
|
|
284
|
+
chunk_files = os.listdir(filename_base)
|
|
285
|
+
pattern = re.compile('chunk\d+.json')
|
|
286
|
+
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
244
287
|
|
|
245
|
-
|
|
246
|
-
all_images = [fn for fn in all_images if not \
|
|
247
|
-
(fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
|
|
288
|
+
if (not force_enumeration) and (len(chunk_files) > 0):
|
|
248
289
|
|
|
249
|
-
print('
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
pass
|
|
290
|
+
print('Found {} chunk files in folder {}, bypassing enumeration'.format(
|
|
291
|
+
len(chunk_files),
|
|
292
|
+
filename_base))
|
|
254
293
|
|
|
255
|
-
#%% Load files from prior enumeration
|
|
256
|
-
|
|
257
|
-
import re
|
|
258
|
-
chunk_files = os.listdir(filename_base)
|
|
259
|
-
pattern = re.compile('chunk\d+.json')
|
|
260
|
-
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
261
294
|
all_images = []
|
|
262
295
|
for fn in chunk_files:
|
|
263
296
|
with open(os.path.join(filename_base,fn),'r') as f:
|
|
@@ -265,8 +298,24 @@ if False:
|
|
|
265
298
|
assert isinstance(chunk,list)
|
|
266
299
|
all_images.extend(chunk)
|
|
267
300
|
all_images = sorted(all_images)
|
|
268
|
-
print('Loaded {} image files from chunks in {}'.format(len(all_images),filename_base))
|
|
269
301
|
|
|
302
|
+
print('Loaded {} image files from {} chunks in {}'.format(
|
|
303
|
+
len(all_images),len(chunk_files),filename_base))
|
|
304
|
+
|
|
305
|
+
else:
|
|
306
|
+
|
|
307
|
+
print('Enumerating image files in {}'.format(input_path))
|
|
308
|
+
|
|
309
|
+
all_images = sorted(path_utils.find_images(input_path,recursive=True,convert_slashes=True))
|
|
310
|
+
|
|
311
|
+
# It's common to run this notebook on an external drive with the main folders in the drive root
|
|
312
|
+
all_images = [fn for fn in all_images if not \
|
|
313
|
+
(fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
|
|
314
|
+
|
|
315
|
+
print('')
|
|
316
|
+
|
|
317
|
+
print('Enumerated {} image files in {}'.format(len(all_images),input_path))
|
|
318
|
+
|
|
270
319
|
|
|
271
320
|
#%% Divide images into chunks
|
|
272
321
|
|
|
@@ -275,13 +324,19 @@ folder_chunks = split_list_into_n_chunks(all_images,n_jobs)
|
|
|
275
324
|
|
|
276
325
|
#%% Estimate total time
|
|
277
326
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
327
|
+
if approx_images_per_second is None:
|
|
328
|
+
|
|
329
|
+
print("Can't estimate inference time for the current environment")
|
|
330
|
+
|
|
331
|
+
else:
|
|
332
|
+
|
|
333
|
+
n_images = len(all_images)
|
|
334
|
+
execution_seconds = n_images / approx_images_per_second
|
|
335
|
+
wallclock_seconds = execution_seconds / n_gpus
|
|
336
|
+
print('Expected time: {}'.format(humanfriendly.format_timespan(wallclock_seconds)))
|
|
337
|
+
|
|
338
|
+
seconds_per_chunk = len(folder_chunks[0]) / approx_images_per_second
|
|
339
|
+
print('Expected time per chunk: {}'.format(humanfriendly.format_timespan(seconds_per_chunk)))
|
|
285
340
|
|
|
286
341
|
|
|
287
342
|
#%% Write file lists
|
|
@@ -298,19 +353,20 @@ for i_chunk,chunk_list in enumerate(folder_chunks):
|
|
|
298
353
|
#%% Generate commands
|
|
299
354
|
|
|
300
355
|
# A list of the scripts tied to each GPU, as absolute paths. We'll write this out at
|
|
301
|
-
# the end so each GPU's list of commands can be run at once
|
|
302
|
-
# running lots of small batches via YOLOv5's val.py, which doesn't support checkpointing.
|
|
356
|
+
# the end so each GPU's list of commands can be run at once
|
|
303
357
|
gpu_to_scripts = defaultdict(list)
|
|
304
358
|
|
|
305
359
|
# i_task = 0; task = task_info[i_task]
|
|
306
360
|
for i_task,task in enumerate(task_info):
|
|
307
361
|
|
|
308
362
|
chunk_file = task['input_file']
|
|
363
|
+
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
364
|
+
|
|
309
365
|
output_fn = chunk_file.replace('.json','_results.json')
|
|
310
366
|
|
|
311
367
|
task['output_file'] = output_fn
|
|
312
368
|
|
|
313
|
-
if
|
|
369
|
+
if n_gpus > 1:
|
|
314
370
|
gpu_number = i_task % n_gpus
|
|
315
371
|
else:
|
|
316
372
|
gpu_number = default_gpu_number
|
|
@@ -326,6 +382,10 @@ for i_task,task in enumerate(task_info):
|
|
|
326
382
|
augment_string = ''
|
|
327
383
|
if augment:
|
|
328
384
|
augment_string = '--augment_enabled 1'
|
|
385
|
+
else:
|
|
386
|
+
augment_string = '--augment_enabled 0'
|
|
387
|
+
|
|
388
|
+
batch_string = '--batch_size {}'.format(yolo_batch_size)
|
|
329
389
|
|
|
330
390
|
symlink_folder = os.path.join(filename_base,'symlinks','symlinks_{}'.format(
|
|
331
391
|
str(i_task).zfill(3)))
|
|
@@ -339,6 +399,10 @@ for i_task,task in enumerate(task_info):
|
|
|
339
399
|
if not remove_yolo_symlink_folder:
|
|
340
400
|
remove_symlink_folder_string = '--no_remove_symlink_folder'
|
|
341
401
|
|
|
402
|
+
write_yolo_debug_output_string = ''
|
|
403
|
+
if write_yolo_debug_output:
|
|
404
|
+
write_yolo_debug_output = '--write_yolo_debug_output'
|
|
405
|
+
|
|
342
406
|
remove_yolo_results_string = ''
|
|
343
407
|
if not remove_yolo_intermediate_results:
|
|
344
408
|
remove_yolo_results_string = '--no_remove_yolo_results_folder'
|
|
@@ -356,15 +420,47 @@ for i_task,task in enumerate(task_info):
|
|
|
356
420
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
357
421
|
|
|
358
422
|
cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}" '
|
|
359
|
-
cmd += f'
|
|
423
|
+
cmd += f'{image_size_string} {augment_string} '
|
|
360
424
|
cmd += f'{symlink_folder_string} {yolo_results_folder_string} {remove_yolo_results_string} '
|
|
361
425
|
cmd += f'{remove_symlink_folder_string} {confidence_threshold_string} {device_string} '
|
|
362
|
-
cmd += f'{overwrite_handling_string}'
|
|
363
|
-
|
|
426
|
+
cmd += f'{overwrite_handling_string} {batch_string} {write_yolo_debug_output_string}'
|
|
427
|
+
|
|
428
|
+
if yolo_working_dir is not None:
|
|
429
|
+
cmd += f' --yolo_working_folder "{yolo_working_dir}"'
|
|
430
|
+
if yolo_dataset_file is not None:
|
|
431
|
+
cmd += ' --yolo_dataset_file "{}"'.format(yolo_dataset_file)
|
|
432
|
+
if yolo_model_type is not None:
|
|
433
|
+
cmd += ' --model_type {}'.format(yolo_model_type)
|
|
434
|
+
|
|
364
435
|
if not use_symlinks_for_yolo_inference:
|
|
365
436
|
cmd += ' --no_use_symlinks'
|
|
366
437
|
|
|
367
438
|
cmd += '\n'
|
|
439
|
+
|
|
440
|
+
elif use_tiled_inference:
|
|
441
|
+
|
|
442
|
+
tiling_folder = os.path.join(filename_base,'tile_cache','tile_cache_{}'.format(
|
|
443
|
+
str(i_task).zfill(3)))
|
|
444
|
+
|
|
445
|
+
if os.name == 'nt':
|
|
446
|
+
cuda_string = f'set CUDA_VISIBLE_DEVICES={gpu_number} & '
|
|
447
|
+
else:
|
|
448
|
+
cuda_string = f'CUDA_VISIBLE_DEVICES={gpu_number} '
|
|
449
|
+
|
|
450
|
+
cmd = f'{cuda_string} python run_tiled_inference.py "{model_file}" "{input_path}" "{tiling_folder}" "{output_fn}"'
|
|
451
|
+
|
|
452
|
+
cmd += f' --image_list "{chunk_file}"'
|
|
453
|
+
cmd += f' --overwrite_handling {overwrite_handling}'
|
|
454
|
+
|
|
455
|
+
if not remove_tiles:
|
|
456
|
+
cmd += ' --no_remove_tiles'
|
|
457
|
+
|
|
458
|
+
# If we're using non-default tile sizes
|
|
459
|
+
if tile_size is not None and (tile_size[0] > 0 or tile_size[1] > 0):
|
|
460
|
+
cmd += ' --tile_size_x {} --tile_size_y {}'.format(tile_size[0],tile_size[1])
|
|
461
|
+
|
|
462
|
+
if tile_overlap is not None:
|
|
463
|
+
cmd += f' --tile_overlap {tile_overlap}'
|
|
368
464
|
|
|
369
465
|
else:
|
|
370
466
|
|
|
@@ -375,7 +471,6 @@ for i_task,task in enumerate(task_info):
|
|
|
375
471
|
|
|
376
472
|
checkpoint_frequency_string = ''
|
|
377
473
|
checkpoint_path_string = ''
|
|
378
|
-
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
379
474
|
|
|
380
475
|
if checkpoint_frequency is not None and checkpoint_frequency > 0:
|
|
381
476
|
checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
|
|
@@ -484,12 +579,10 @@ multiple processes, so the tasks will run serially. This only matters if you ha
|
|
|
484
579
|
GPUs.
|
|
485
580
|
"""
|
|
486
581
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
pass
|
|
490
|
-
|
|
491
|
-
#%%% Run the tasks (commented out)
|
|
582
|
+
run_tasks_in_notebook = False
|
|
492
583
|
|
|
584
|
+
if run_tasks_in_notebook:
|
|
585
|
+
|
|
493
586
|
assert not use_yolo_inference_scripts, \
|
|
494
587
|
'If you want to use the YOLOv5 inference scripts, you can\'t run the model interactively (yet)'
|
|
495
588
|
|
|
@@ -537,15 +630,32 @@ if False:
|
|
|
537
630
|
|
|
538
631
|
# ...for each chunk
|
|
539
632
|
|
|
540
|
-
# ...if
|
|
633
|
+
# ...if we're running tasks in this notebook
|
|
541
634
|
|
|
542
635
|
|
|
543
636
|
#%% Load results, look for failed or missing images in each task
|
|
544
637
|
|
|
638
|
+
# Check that all task output files exist
|
|
639
|
+
|
|
640
|
+
missing_output_files = []
|
|
641
|
+
|
|
642
|
+
# i_task = 0; task = task_info[i_task]
|
|
643
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
644
|
+
output_file = task['output_file']
|
|
645
|
+
if not os.path.isfile(output_file):
|
|
646
|
+
missing_output_files.append(output_file)
|
|
647
|
+
|
|
648
|
+
if len(missing_output_files) > 0:
|
|
649
|
+
print('Missing {} output files:'.format(len(missing_output_files)))
|
|
650
|
+
for s in missing_output_files:
|
|
651
|
+
print(s)
|
|
652
|
+
raise Exception('Missing output files')
|
|
653
|
+
|
|
654
|
+
|
|
545
655
|
n_total_failures = 0
|
|
546
656
|
|
|
547
657
|
# i_task = 0; task = task_info[i_task]
|
|
548
|
-
for i_task,task in enumerate(task_info):
|
|
658
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
549
659
|
|
|
550
660
|
chunk_file = task['input_file']
|
|
551
661
|
output_file = task['output_file']
|
|
@@ -562,6 +672,13 @@ for i_task,task in enumerate(task_info):
|
|
|
562
672
|
|
|
563
673
|
# im = task_results['images'][0]
|
|
564
674
|
for im in task_results['images']:
|
|
675
|
+
|
|
676
|
+
# Most of the time, inference result files use absolute paths, but it's
|
|
677
|
+
# getting annoying to make sure that's *always* true, so handle both here.
|
|
678
|
+
# E.g., when using tiled inference, paths will be relative.
|
|
679
|
+
if not os.path.isabs(im['file']):
|
|
680
|
+
fn = os.path.join(input_path,im['file']).replace('\\','/')
|
|
681
|
+
im['file'] = fn
|
|
565
682
|
assert im['file'].startswith(input_path)
|
|
566
683
|
assert im['file'] in task_images_set
|
|
567
684
|
filename_to_results[im['file']] = im
|
|
@@ -573,7 +690,8 @@ for i_task,task in enumerate(task_info):
|
|
|
573
690
|
task['results'] = task_results
|
|
574
691
|
|
|
575
692
|
for fn in task_images:
|
|
576
|
-
assert fn in filename_to_results
|
|
693
|
+
assert fn in filename_to_results, \
|
|
694
|
+
'File {} not found in results for task {}'.format(fn,i_task)
|
|
577
695
|
|
|
578
696
|
n_total_failures += n_task_failures
|
|
579
697
|
|
|
@@ -593,7 +711,7 @@ combined_results = {}
|
|
|
593
711
|
combined_results['images'] = []
|
|
594
712
|
images_processed = set()
|
|
595
713
|
|
|
596
|
-
for i_task,task in enumerate(task_info):
|
|
714
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
597
715
|
|
|
598
716
|
task_results = task['results']
|
|
599
717
|
|
|
@@ -620,14 +738,14 @@ assert len(combined_results['images']) == len(all_images), \
|
|
|
620
738
|
result_filenames = [im['file'] for im in combined_results['images']]
|
|
621
739
|
assert len(combined_results['images']) == len(set(result_filenames))
|
|
622
740
|
|
|
623
|
-
#
|
|
741
|
+
# Convert to relative paths, preserving '/' as the path separator, regardless of OS
|
|
624
742
|
for im in combined_results['images']:
|
|
743
|
+
assert '\\' not in im['file']
|
|
744
|
+
assert im['file'].startswith(input_path)
|
|
625
745
|
if input_path.endswith(':'):
|
|
626
|
-
assert im['file'].startswith(input_path)
|
|
627
746
|
im['file'] = im['file'].replace(input_path,'',1)
|
|
628
747
|
else:
|
|
629
|
-
|
|
630
|
-
im['file'] = im['file'].replace(input_path + os.path.sep,'',1)
|
|
748
|
+
im['file'] = im['file'].replace(input_path + '/','',1)
|
|
631
749
|
|
|
632
750
|
combined_api_output_file = os.path.join(
|
|
633
751
|
combined_api_output_folder,
|
|
@@ -675,88 +793,8 @@ options.api_output_file = combined_api_output_file
|
|
|
675
793
|
options.output_dir = output_base
|
|
676
794
|
ppresults = process_batch_results(options)
|
|
677
795
|
html_output_file = ppresults.output_html_file
|
|
678
|
-
path_utils.open_file(html_output_file)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
#%% RDE (sample directory collapsing)
|
|
682
|
-
|
|
683
|
-
#
|
|
684
|
-
# The next few cells are about repeat detection elimination; if you want to skip this,
|
|
685
|
-
# and still do other stuff in this notebook (e.g. running classifiers), that's fine, but
|
|
686
|
-
# the rest of the notebook weakly assumes you've done this. Specifically, it looks for
|
|
687
|
-
# the variable "filtered_api_output_file" (a file produced by the RDE process). If you
|
|
688
|
-
# don't run the RDE cells, just change "filtered_api_output_file" to "combined_api_output_file"
|
|
689
|
-
# (the raw output from MegaDetector). Then it will be like all this RDE stuff doesn't exist.
|
|
690
|
-
#
|
|
691
|
-
# Though FWIW, once you're sufficiently power-user-ish to use this notebook, RDE is almost
|
|
692
|
-
# always worth it.
|
|
693
|
-
#
|
|
694
|
-
|
|
695
|
-
def relative_path_to_location(relative_path):
|
|
696
|
-
"""
|
|
697
|
-
This is a sample function that returns a camera name given an image path. By
|
|
698
|
-
default in the RDE process, leaf-node folders are equivalent to cameras. To map
|
|
699
|
-
something other than leaf-node folders to cameras, fill in this function, and un-comment the
|
|
700
|
-
line below containing "relative_path_to_location".
|
|
701
|
-
|
|
702
|
-
Sample regular expressions are included here for common patterns, particularly the
|
|
703
|
-
overflow folders created by Reconyx and Bushnell camera traps. So if one of those
|
|
704
|
-
fits your scenario, you don't have to modify this function, just un-comment the line
|
|
705
|
-
below that enables this feature.
|
|
706
|
-
|
|
707
|
-
Nothing bad happens if you have overflow folders like this and you don't
|
|
708
|
-
enable this mapping, you are just taking a more conservative approach to RDE in that
|
|
709
|
-
scenario.
|
|
710
|
-
"""
|
|
711
|
-
|
|
712
|
-
import re
|
|
713
|
-
|
|
714
|
-
# 100RECNX is the overflow folder style for Reconyx cameras
|
|
715
|
-
# 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
|
|
716
|
-
# 100_BTCF is the overflow folder style for Browning cameras
|
|
717
|
-
patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/']
|
|
718
|
-
|
|
719
|
-
relative_path = relative_path.replace('\\','/')
|
|
720
|
-
for pat in patterns:
|
|
721
|
-
relative_path = re.sub(pat,'/',relative_path)
|
|
722
|
-
location_name = os.path.dirname(relative_path)
|
|
723
|
-
|
|
724
|
-
return location_name
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
#%% Test cells for relative_path_to_location
|
|
728
|
-
|
|
729
|
-
if False:
|
|
730
|
-
|
|
731
|
-
pass
|
|
732
|
-
|
|
733
|
-
#%% Test the generic cases
|
|
734
|
-
|
|
735
|
-
relative_path = 'a/b/c/d/100EK113/blah.jpg'
|
|
736
|
-
print(relative_path_to_location(relative_path))
|
|
737
|
-
|
|
738
|
-
relative_path = 'a/b/c/d/100RECNX/blah.jpg'
|
|
739
|
-
print(relative_path_to_location(relative_path))
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
#%% Test relative_path_to_location on the current dataset
|
|
743
|
-
|
|
744
|
-
with open(combined_api_output_file,'r') as f:
|
|
745
|
-
d = json.load(f)
|
|
746
|
-
image_filenames = [im['file'] for im in d['images']]
|
|
747
|
-
|
|
748
|
-
location_names = set()
|
|
749
|
-
|
|
750
|
-
# relative_path = image_filenames[0]
|
|
751
|
-
for relative_path in tqdm(image_filenames):
|
|
752
|
-
location_name = relative_path_to_location(relative_path)
|
|
753
|
-
location_names.add(location_name)
|
|
754
|
-
|
|
755
|
-
location_names = list(location_names)
|
|
756
|
-
location_names.sort()
|
|
757
|
-
|
|
758
|
-
for s in location_names:
|
|
759
|
-
print(s)
|
|
796
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
797
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
760
798
|
|
|
761
799
|
|
|
762
800
|
#%% Repeat detection elimination, phase 1
|
|
@@ -768,7 +806,7 @@ task_index = 0
|
|
|
768
806
|
|
|
769
807
|
options = repeat_detections_core.RepeatDetectionOptions()
|
|
770
808
|
|
|
771
|
-
options.confidenceMin = 0.
|
|
809
|
+
options.confidenceMin = 0.1
|
|
772
810
|
options.confidenceMax = 1.01
|
|
773
811
|
options.iouThreshold = 0.85
|
|
774
812
|
options.occurrenceThreshold = 15
|
|
@@ -785,13 +823,13 @@ options.otherDetectionsThreshold = options.confidenceMin
|
|
|
785
823
|
|
|
786
824
|
options.bRenderDetectionTiles = True
|
|
787
825
|
options.maxOutputImageWidth = 2000
|
|
788
|
-
options.detectionTilesMaxCrops =
|
|
826
|
+
options.detectionTilesMaxCrops = 300
|
|
789
827
|
|
|
790
828
|
# options.lineThickness = 5
|
|
791
829
|
# options.boxExpansion = 8
|
|
792
830
|
|
|
793
831
|
# To invoke custom collapsing of folders for a particular manufacturer's naming scheme
|
|
794
|
-
|
|
832
|
+
options.customDirNameFunction = relative_path_to_location
|
|
795
833
|
|
|
796
834
|
options.bRenderHtml = False
|
|
797
835
|
options.imageBase = input_path
|
|
@@ -816,9 +854,9 @@ options.debugMaxRenderInstance = -1
|
|
|
816
854
|
# Can be None, 'xsort', or 'clustersort'
|
|
817
855
|
options.smartSort = 'xsort'
|
|
818
856
|
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
857
|
+
suspicious_detection_results = repeat_detections_core.find_repeat_detections(combined_api_output_file,
|
|
858
|
+
outputFilename=None,
|
|
859
|
+
options=options)
|
|
822
860
|
|
|
823
861
|
|
|
824
862
|
#%% Manual RDE step
|
|
@@ -826,7 +864,8 @@ suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(combi
|
|
|
826
864
|
## DELETE THE VALID DETECTIONS ##
|
|
827
865
|
|
|
828
866
|
# If you run this line, it will open the folder up in your file browser
|
|
829
|
-
path_utils.open_file(os.path.dirname(
|
|
867
|
+
path_utils.open_file(os.path.dirname(suspicious_detection_results.filterFile),
|
|
868
|
+
attempt_to_open_in_wsl_host=True)
|
|
830
869
|
|
|
831
870
|
#
|
|
832
871
|
# If you ran the previous cell, but then you change your mind and you don't want to do
|
|
@@ -834,7 +873,7 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
834
873
|
# previous cell. If you do that, you're implicitly telling the notebook that you looked
|
|
835
874
|
# at everything in that folder, and confirmed there were no red boxes on animals.
|
|
836
875
|
#
|
|
837
|
-
# Instead, either change "
|
|
876
|
+
# Instead, either change "filtered_output_filename" below to "combined_api_output_file",
|
|
838
877
|
# or delete *all* the images in the filtering folder.
|
|
839
878
|
#
|
|
840
879
|
|
|
@@ -843,12 +882,13 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
843
882
|
|
|
844
883
|
from api.batch_processing.postprocessing.repeat_detection_elimination import remove_repeat_detections
|
|
845
884
|
|
|
846
|
-
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
885
|
+
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
886
|
+
'filtered_{}'.format(rde_string))
|
|
847
887
|
|
|
848
888
|
remove_repeat_detections.remove_repeat_detections(
|
|
849
889
|
inputFile=combined_api_output_file,
|
|
850
890
|
outputFile=filtered_output_filename,
|
|
851
|
-
filteringDir=os.path.dirname(
|
|
891
|
+
filteringDir=os.path.dirname(suspicious_detection_results.filterFile)
|
|
852
892
|
)
|
|
853
893
|
|
|
854
894
|
|
|
@@ -890,7 +930,8 @@ options.output_dir = output_base
|
|
|
890
930
|
ppresults = process_batch_results(options)
|
|
891
931
|
html_output_file = ppresults.output_html_file
|
|
892
932
|
|
|
893
|
-
path_utils.open_file(html_output_file)
|
|
933
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
934
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
894
935
|
|
|
895
936
|
|
|
896
937
|
#%% Run MegaClassifier (actually, write out a script that runs MegaClassifier)
|
|
@@ -899,6 +940,11 @@ path_utils.open_file(html_output_file)
|
|
|
899
940
|
final_output_path_mc = None
|
|
900
941
|
final_output_path_ic = None
|
|
901
942
|
|
|
943
|
+
# If we didn't do RDE
|
|
944
|
+
if filtered_output_filename is None:
|
|
945
|
+
print("Warning: it looks like you didn't do RDE, using the raw output file")
|
|
946
|
+
filtered_output_filename = combined_api_output_file
|
|
947
|
+
|
|
902
948
|
classifier_name_short = 'megaclassifier'
|
|
903
949
|
threshold_str = '0.15' # 0.6
|
|
904
950
|
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
@@ -1086,7 +1132,6 @@ with open(output_file,'w') as f:
|
|
|
1086
1132
|
for s in commands:
|
|
1087
1133
|
f.write('{}'.format(s))
|
|
1088
1134
|
|
|
1089
|
-
import stat
|
|
1090
1135
|
st = os.stat(output_file)
|
|
1091
1136
|
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
1092
1137
|
|
|
@@ -1256,8 +1301,6 @@ os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
|
1256
1301
|
|
|
1257
1302
|
#%% Within-image classification smoothing
|
|
1258
1303
|
|
|
1259
|
-
from collections import defaultdict
|
|
1260
|
-
|
|
1261
1304
|
#
|
|
1262
1305
|
# Only count detections with a classification confidence threshold above
|
|
1263
1306
|
# *classification_confidence_threshold*, which in practice means we're only
|
|
@@ -1516,7 +1559,7 @@ else:
|
|
|
1516
1559
|
import datetime
|
|
1517
1560
|
from data_management.read_exif import parse_exif_datetime_string
|
|
1518
1561
|
|
|
1519
|
-
min_valid_timestamp_year =
|
|
1562
|
+
min_valid_timestamp_year = 2001
|
|
1520
1563
|
|
|
1521
1564
|
now = datetime.datetime.now()
|
|
1522
1565
|
|
|
@@ -1540,6 +1583,7 @@ for exif_result in tqdm(exif_results):
|
|
|
1540
1583
|
|
|
1541
1584
|
im['file_name'] = exif_result['file_name']
|
|
1542
1585
|
im['id'] = im['file_name']
|
|
1586
|
+
|
|
1543
1587
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
1544
1588
|
(exif_datetime_tag not in exif_result['exif_tags']):
|
|
1545
1589
|
exif_dt = None
|
|
@@ -1573,7 +1617,7 @@ for exif_result in tqdm(exif_results):
|
|
|
1573
1617
|
|
|
1574
1618
|
# ...for each exif image result
|
|
1575
1619
|
|
|
1576
|
-
print('Parsed EXIF datetime information, unable to parse EXIF
|
|
1620
|
+
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
1577
1621
|
len(images_without_datetime),len(exif_results)))
|
|
1578
1622
|
|
|
1579
1623
|
|
|
@@ -1639,7 +1683,7 @@ min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
|
|
|
1639
1683
|
max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
|
|
1640
1684
|
|
|
1641
1685
|
# If the ratio between a dominant class and a secondary class count is greater than this,
|
|
1642
|
-
# regardless of the secondary class count, switch those
|
|
1686
|
+
# regardless of the secondary class count, switch those classifications (i.e., ignore
|
|
1643
1687
|
# max_secondary_class_classifications_above_threshold_for_class_smoothing).
|
|
1644
1688
|
#
|
|
1645
1689
|
# This may be different for different dominant classes, e.g. if we see lots of cows, they really
|
|
@@ -1959,8 +2003,8 @@ print('Processing {} to {}'.format(base_task_name, output_base))
|
|
|
1959
2003
|
options.api_output_file = sequence_smoothed_classification_file
|
|
1960
2004
|
options.output_dir = output_base
|
|
1961
2005
|
ppresults = process_batch_results(options)
|
|
1962
|
-
path_utils.open_file(ppresults.output_html_file)
|
|
1963
|
-
|
|
2006
|
+
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
|
|
2007
|
+
# import clipboard; clipboard.copy(ppresults.output_html_file)
|
|
1964
2008
|
|
|
1965
2009
|
#% Zip .json files
|
|
1966
2010
|
|
|
@@ -2027,7 +2071,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
|
|
|
2027
2071
|
results = compare_batch_results(options)
|
|
2028
2072
|
|
|
2029
2073
|
from md_utils.path_utils import open_file
|
|
2030
|
-
open_file(results.html_output_file)
|
|
2074
|
+
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2031
2075
|
|
|
2032
2076
|
|
|
2033
2077
|
#%% Merge in high-confidence detections from another results file
|
|
@@ -2081,7 +2125,7 @@ options.output_dir = output_base_large_boxes
|
|
|
2081
2125
|
|
|
2082
2126
|
ppresults = process_batch_results(options)
|
|
2083
2127
|
html_output_file = ppresults.output_html_file
|
|
2084
|
-
path_utils.open_file(html_output_file)
|
|
2128
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2085
2129
|
|
|
2086
2130
|
|
|
2087
2131
|
#%% .json splitting
|
|
@@ -2094,12 +2138,6 @@ from api.batch_processing.postprocessing.subset_json_detector_output import (
|
|
|
2094
2138
|
input_filename = filtered_output_filename
|
|
2095
2139
|
output_base = os.path.join(combined_api_output_folder,base_task_name + '_json_subsets')
|
|
2096
2140
|
|
|
2097
|
-
if False:
|
|
2098
|
-
if data is None:
|
|
2099
|
-
with open(input_filename) as f:
|
|
2100
|
-
data = json.load(f)
|
|
2101
|
-
print('Data set contains {} images'.format(len(data['images'])))
|
|
2102
|
-
|
|
2103
2141
|
print('Processing file {} to {}'.format(input_filename,output_base))
|
|
2104
2142
|
|
|
2105
2143
|
options = SubsetJsonDetectorOutputOptions()
|
|
@@ -2204,13 +2242,47 @@ video_output_filename = filtered_output_filename.replace('.json','_aggregated.js
|
|
|
2204
2242
|
frame_results_to_video_results(filtered_output_filename,video_output_filename)
|
|
2205
2243
|
|
|
2206
2244
|
|
|
2245
|
+
#%% Sample custom path replacement function
|
|
2246
|
+
|
|
2247
|
+
def custom_relative_path_to_location(relative_path):
|
|
2248
|
+
|
|
2249
|
+
relative_path = relative_path.replace('\\','/')
|
|
2250
|
+
tokens = relative_path.split('/')
|
|
2251
|
+
location_name = '/'.join(tokens[0:2])
|
|
2252
|
+
return location_name
|
|
2253
|
+
|
|
2254
|
+
|
|
2255
|
+
#%% Test relative_path_to_location on the current dataset
|
|
2256
|
+
|
|
2257
|
+
with open(combined_api_output_file,'r') as f:
|
|
2258
|
+
d = json.load(f)
|
|
2259
|
+
image_filenames = [im['file'] for im in d['images']]
|
|
2260
|
+
|
|
2261
|
+
location_names = set()
|
|
2262
|
+
|
|
2263
|
+
# relative_path = image_filenames[0]
|
|
2264
|
+
for relative_path in tqdm(image_filenames):
|
|
2265
|
+
location_name = relative_path_to_location(relative_path)
|
|
2266
|
+
location_names.add(location_name)
|
|
2267
|
+
|
|
2268
|
+
location_names = list(location_names)
|
|
2269
|
+
location_names.sort()
|
|
2270
|
+
|
|
2271
|
+
for s in location_names:
|
|
2272
|
+
print(s)
|
|
2273
|
+
|
|
2274
|
+
|
|
2207
2275
|
#%% End notebook: turn this script into a notebook (how meta!)
|
|
2208
2276
|
|
|
2209
2277
|
import os
|
|
2210
2278
|
import nbformat as nbf
|
|
2211
2279
|
|
|
2212
|
-
|
|
2213
|
-
'
|
|
2280
|
+
if os.name == 'nt':
|
|
2281
|
+
git_base = r'c:\git'
|
|
2282
|
+
else:
|
|
2283
|
+
git_base = os.path.expanduer('~/git')
|
|
2284
|
+
|
|
2285
|
+
input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
|
|
2214
2286
|
assert os.path.isfile(input_py_file)
|
|
2215
2287
|
output_ipynb_file = input_py_file.replace('.py','.ipynb')
|
|
2216
2288
|
|
|
@@ -2233,14 +2305,23 @@ i_line = 0
|
|
|
2233
2305
|
|
|
2234
2306
|
header_comment = ''
|
|
2235
2307
|
|
|
2308
|
+
# Delete a few lines from the top that don't belong in the NB version, e.g. the name
|
|
2309
|
+
# of the .py file
|
|
2236
2310
|
lines_to_ignore = 7
|
|
2311
|
+
expected_first_token = '# This script'
|
|
2312
|
+
found_first_token = False
|
|
2237
2313
|
|
|
2238
2314
|
# Everything before the first cell is the header comment
|
|
2239
2315
|
while(not lines[i_line].startswith('#%%')):
|
|
2316
|
+
|
|
2240
2317
|
if i_line < lines_to_ignore:
|
|
2241
2318
|
i_line += 1
|
|
2242
2319
|
continue
|
|
2243
2320
|
|
|
2321
|
+
if not found_first_token:
|
|
2322
|
+
assert lines[i_line].startswith(expected_first_token)
|
|
2323
|
+
found_first_token = True
|
|
2324
|
+
|
|
2244
2325
|
s = lines[i_line].replace('#','').strip()
|
|
2245
2326
|
if len(s) == 0:
|
|
2246
2327
|
header_comment += '\n\n'
|