birder 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- birder/common/lib.py +2 -9
- birder/common/training_cli.py +18 -0
- birder/common/training_utils.py +123 -10
- birder/data/collators/detection.py +10 -3
- birder/data/datasets/coco.py +8 -10
- birder/data/transforms/detection.py +30 -13
- birder/inference/detection.py +108 -4
- birder/inference/wbf.py +226 -0
- birder/net/__init__.py +8 -0
- birder/net/detection/efficientdet.py +65 -86
- birder/net/detection/rt_detr_v1.py +1 -0
- birder/net/detection/yolo_anchors.py +205 -0
- birder/net/detection/yolo_v2.py +25 -24
- birder/net/detection/yolo_v3.py +39 -40
- birder/net/detection/yolo_v4.py +28 -26
- birder/net/detection/yolo_v4_tiny.py +24 -20
- birder/net/fasternet.py +1 -1
- birder/net/gc_vit.py +671 -0
- birder/net/lit_v1.py +472 -0
- birder/net/lit_v1_tiny.py +342 -0
- birder/net/lit_v2.py +436 -0
- birder/net/mobilenet_v4_hybrid.py +1 -1
- birder/net/resnet_v1.py +1 -1
- birder/net/resnext.py +67 -25
- birder/net/se_resnet_v1.py +46 -0
- birder/net/se_resnext.py +3 -0
- birder/net/simple_vit.py +2 -2
- birder/net/vit.py +0 -15
- birder/net/vovnet_v2.py +31 -1
- birder/scripts/benchmark.py +90 -21
- birder/scripts/predict.py +1 -0
- birder/scripts/predict_detection.py +18 -11
- birder/scripts/train.py +10 -34
- birder/scripts/train_barlow_twins.py +10 -34
- birder/scripts/train_byol.py +10 -34
- birder/scripts/train_capi.py +10 -35
- birder/scripts/train_data2vec.py +9 -34
- birder/scripts/train_data2vec2.py +9 -34
- birder/scripts/train_detection.py +48 -40
- birder/scripts/train_dino_v1.py +10 -34
- birder/scripts/train_dino_v2.py +9 -34
- birder/scripts/train_dino_v2_dist.py +9 -34
- birder/scripts/train_franca.py +9 -34
- birder/scripts/train_i_jepa.py +9 -34
- birder/scripts/train_ibot.py +9 -34
- birder/scripts/train_kd.py +156 -64
- birder/scripts/train_mim.py +10 -34
- birder/scripts/train_mmcr.py +10 -34
- birder/scripts/train_rotnet.py +10 -34
- birder/scripts/train_simclr.py +10 -34
- birder/scripts/train_vicreg.py +10 -34
- birder/tools/auto_anchors.py +20 -1
- birder/tools/pack.py +172 -103
- birder/tools/show_det_iterator.py +10 -1
- birder/version.py +1 -1
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/METADATA +3 -3
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/RECORD +61 -55
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/WHEEL +0 -0
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/entry_points.txt +0 -0
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -33,7 +33,6 @@ from birder.common import training_utils
|
|
|
33
33
|
from birder.common.lib import format_duration
|
|
34
34
|
from birder.common.lib import get_mim_network_name
|
|
35
35
|
from birder.common.lib import get_network_name
|
|
36
|
-
from birder.common.lib import set_random_seeds
|
|
37
36
|
from birder.common.masking import BlockMasking
|
|
38
37
|
from birder.conf import settings
|
|
39
38
|
from birder.data.dataloader.webdataset import make_wds_loader
|
|
@@ -179,44 +178,16 @@ def train(args: argparse.Namespace) -> None:
|
|
|
179
178
|
#
|
|
180
179
|
# Initialize
|
|
181
180
|
#
|
|
182
|
-
training_utils.
|
|
183
|
-
logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
|
|
184
|
-
training_utils.log_git_info()
|
|
181
|
+
(device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
|
|
185
182
|
|
|
186
183
|
if args.size is None:
|
|
187
184
|
args.size = registry.get_default_size(args.network)
|
|
188
185
|
|
|
189
186
|
logger.info(f"Using size={args.size}")
|
|
190
187
|
|
|
191
|
-
if args.cpu is True:
|
|
192
|
-
device = torch.device("cpu")
|
|
193
|
-
device_id = 0
|
|
194
|
-
else:
|
|
195
|
-
device = torch.device("cuda")
|
|
196
|
-
device_id = torch.cuda.current_device()
|
|
197
|
-
|
|
198
|
-
if args.use_deterministic_algorithms is True:
|
|
199
|
-
torch.backends.cudnn.benchmark = False
|
|
200
|
-
torch.use_deterministic_algorithms(True)
|
|
201
|
-
else:
|
|
202
|
-
torch.backends.cudnn.benchmark = True
|
|
203
|
-
|
|
204
|
-
if args.seed is not None:
|
|
205
|
-
set_random_seeds(args.seed)
|
|
206
|
-
|
|
207
|
-
if args.non_interactive is True or training_utils.is_local_primary(args) is False:
|
|
208
|
-
disable_tqdm = True
|
|
209
|
-
elif sys.stderr.isatty() is False:
|
|
210
|
-
disable_tqdm = True
|
|
211
|
-
else:
|
|
212
|
-
disable_tqdm = False
|
|
213
|
-
|
|
214
|
-
# Enable or disable the autograd anomaly detection
|
|
215
|
-
torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
|
|
216
|
-
|
|
217
188
|
batch_size: int = args.batch_size
|
|
218
189
|
grad_accum_steps: int = args.grad_accum_steps
|
|
219
|
-
logger.debug(f"Effective batch size = {
|
|
190
|
+
logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
|
|
220
191
|
|
|
221
192
|
begin_epoch = 1
|
|
222
193
|
epochs = args.epochs + 1
|
|
@@ -442,27 +413,31 @@ def train(args: argparse.Namespace) -> None:
|
|
|
442
413
|
|
|
443
414
|
optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
|
|
444
415
|
last_batch_idx = len(training_loader) - 1
|
|
416
|
+
logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
|
|
445
417
|
|
|
446
418
|
#
|
|
447
419
|
# Optimizer, learning rate scheduler and training parameter groups
|
|
448
420
|
#
|
|
449
421
|
|
|
422
|
+
# Learning rate scaling
|
|
423
|
+
lr = training_utils.scale_lr(args)
|
|
424
|
+
|
|
450
425
|
# Training parameter groups
|
|
451
426
|
custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
|
|
452
427
|
parameters = training_utils.optimizer_parameter_groups(
|
|
453
428
|
net,
|
|
454
429
|
args.wd,
|
|
430
|
+
base_lr=lr,
|
|
455
431
|
norm_weight_decay=args.norm_wd,
|
|
456
432
|
custom_keys_weight_decay=custom_keys_weight_decay,
|
|
433
|
+
custom_layer_weight_decay=args.custom_layer_wd,
|
|
457
434
|
layer_decay=args.layer_decay,
|
|
458
435
|
layer_decay_min_scale=args.layer_decay_min_scale,
|
|
459
436
|
layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
|
|
460
437
|
bias_lr=args.bias_lr,
|
|
438
|
+
custom_layer_lr_scale=args.custom_layer_lr_scale,
|
|
461
439
|
)
|
|
462
440
|
|
|
463
|
-
# Learning rate scaling
|
|
464
|
-
lr = training_utils.scale_lr(args)
|
|
465
|
-
|
|
466
441
|
if args.lr_scheduler_update == "epoch":
|
|
467
442
|
step_update = False
|
|
468
443
|
scheduler_steps_per_epoch = 1
|
birder/scripts/train_franca.py
CHANGED
|
@@ -37,7 +37,6 @@ from birder.common import training_utils
|
|
|
37
37
|
from birder.common.lib import format_duration
|
|
38
38
|
from birder.common.lib import get_mim_network_name
|
|
39
39
|
from birder.common.lib import get_network_name
|
|
40
|
-
from birder.common.lib import set_random_seeds
|
|
41
40
|
from birder.common.masking import RollBlockMasking
|
|
42
41
|
from birder.conf import settings
|
|
43
42
|
from birder.data.dataloader.webdataset import make_wds_loader
|
|
@@ -205,44 +204,16 @@ def train(args: argparse.Namespace) -> None:
|
|
|
205
204
|
#
|
|
206
205
|
# Initialize
|
|
207
206
|
#
|
|
208
|
-
training_utils.
|
|
209
|
-
logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
|
|
210
|
-
training_utils.log_git_info()
|
|
207
|
+
(device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
|
|
211
208
|
|
|
212
209
|
if args.size is None:
|
|
213
210
|
args.size = registry.get_default_size(args.network)
|
|
214
211
|
|
|
215
212
|
logger.info(f"Using size={args.size}")
|
|
216
213
|
|
|
217
|
-
if args.cpu is True:
|
|
218
|
-
device = torch.device("cpu")
|
|
219
|
-
device_id = 0
|
|
220
|
-
else:
|
|
221
|
-
device = torch.device("cuda")
|
|
222
|
-
device_id = torch.cuda.current_device()
|
|
223
|
-
|
|
224
|
-
if args.use_deterministic_algorithms is True:
|
|
225
|
-
torch.backends.cudnn.benchmark = False
|
|
226
|
-
torch.use_deterministic_algorithms(True)
|
|
227
|
-
else:
|
|
228
|
-
torch.backends.cudnn.benchmark = True
|
|
229
|
-
|
|
230
|
-
if args.seed is not None:
|
|
231
|
-
set_random_seeds(args.seed)
|
|
232
|
-
|
|
233
|
-
if args.non_interactive is True or training_utils.is_local_primary(args) is False:
|
|
234
|
-
disable_tqdm = True
|
|
235
|
-
elif sys.stderr.isatty() is False:
|
|
236
|
-
disable_tqdm = True
|
|
237
|
-
else:
|
|
238
|
-
disable_tqdm = False
|
|
239
|
-
|
|
240
|
-
# Enable or disable the autograd anomaly detection
|
|
241
|
-
torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
|
|
242
|
-
|
|
243
214
|
batch_size: int = args.batch_size
|
|
244
215
|
grad_accum_steps: int = args.grad_accum_steps
|
|
245
|
-
logger.debug(f"Effective batch size = {
|
|
216
|
+
logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
|
|
246
217
|
|
|
247
218
|
begin_epoch = 1
|
|
248
219
|
epochs = args.epochs + 1
|
|
@@ -447,27 +418,31 @@ def train(args: argparse.Namespace) -> None:
|
|
|
447
418
|
|
|
448
419
|
optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
|
|
449
420
|
last_batch_idx = len(training_loader) - 1
|
|
421
|
+
logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
|
|
450
422
|
|
|
451
423
|
#
|
|
452
424
|
# Optimizer, learning rate scheduler and training parameter groups
|
|
453
425
|
#
|
|
454
426
|
|
|
427
|
+
# Learning rate scaling
|
|
428
|
+
lr = training_utils.scale_lr(args)
|
|
429
|
+
|
|
455
430
|
# Training parameter groups
|
|
456
431
|
custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
|
|
457
432
|
parameters = training_utils.optimizer_parameter_groups(
|
|
458
433
|
net,
|
|
459
434
|
args.wd,
|
|
435
|
+
base_lr=lr,
|
|
460
436
|
norm_weight_decay=args.norm_wd,
|
|
461
437
|
custom_keys_weight_decay=custom_keys_weight_decay,
|
|
438
|
+
custom_layer_weight_decay=args.custom_layer_wd,
|
|
462
439
|
layer_decay=args.layer_decay,
|
|
463
440
|
layer_decay_min_scale=args.layer_decay_min_scale,
|
|
464
441
|
layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
|
|
465
442
|
bias_lr=args.bias_lr,
|
|
443
|
+
custom_layer_lr_scale=args.custom_layer_lr_scale,
|
|
466
444
|
)
|
|
467
445
|
|
|
468
|
-
# Learning rate scaling
|
|
469
|
-
lr = training_utils.scale_lr(args)
|
|
470
|
-
|
|
471
446
|
if args.lr_scheduler_update == "epoch":
|
|
472
447
|
step_update = False
|
|
473
448
|
scheduler_steps_per_epoch = 1
|
birder/scripts/train_i_jepa.py
CHANGED
|
@@ -40,7 +40,6 @@ from birder.common import training_utils
|
|
|
40
40
|
from birder.common.lib import format_duration
|
|
41
41
|
from birder.common.lib import get_mim_network_name
|
|
42
42
|
from birder.common.lib import get_network_name
|
|
43
|
-
from birder.common.lib import set_random_seeds
|
|
44
43
|
from birder.conf import settings
|
|
45
44
|
from birder.data.dataloader.webdataset import make_wds_loader
|
|
46
45
|
from birder.data.datasets.directory import make_image_dataset
|
|
@@ -84,44 +83,16 @@ def train(args: argparse.Namespace) -> None:
|
|
|
84
83
|
#
|
|
85
84
|
# Initialize
|
|
86
85
|
#
|
|
87
|
-
training_utils.
|
|
88
|
-
logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
|
|
89
|
-
training_utils.log_git_info()
|
|
86
|
+
(device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
|
|
90
87
|
|
|
91
88
|
if args.size is None:
|
|
92
89
|
args.size = registry.get_default_size(args.network)
|
|
93
90
|
|
|
94
91
|
logger.info(f"Using size={args.size}")
|
|
95
92
|
|
|
96
|
-
if args.cpu is True:
|
|
97
|
-
device = torch.device("cpu")
|
|
98
|
-
device_id = 0
|
|
99
|
-
else:
|
|
100
|
-
device = torch.device("cuda")
|
|
101
|
-
device_id = torch.cuda.current_device()
|
|
102
|
-
|
|
103
|
-
if args.use_deterministic_algorithms is True:
|
|
104
|
-
torch.backends.cudnn.benchmark = False
|
|
105
|
-
torch.use_deterministic_algorithms(True)
|
|
106
|
-
else:
|
|
107
|
-
torch.backends.cudnn.benchmark = True
|
|
108
|
-
|
|
109
|
-
if args.seed is not None:
|
|
110
|
-
set_random_seeds(args.seed)
|
|
111
|
-
|
|
112
|
-
if args.non_interactive is True or training_utils.is_local_primary(args) is False:
|
|
113
|
-
disable_tqdm = True
|
|
114
|
-
elif sys.stderr.isatty() is False:
|
|
115
|
-
disable_tqdm = True
|
|
116
|
-
else:
|
|
117
|
-
disable_tqdm = False
|
|
118
|
-
|
|
119
|
-
# Enable or disable the autograd anomaly detection
|
|
120
|
-
torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
|
|
121
|
-
|
|
122
93
|
batch_size: int = args.batch_size
|
|
123
94
|
grad_accum_steps: int = args.grad_accum_steps
|
|
124
|
-
logger.debug(f"Effective batch size = {
|
|
95
|
+
logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
|
|
125
96
|
|
|
126
97
|
begin_epoch = 1
|
|
127
98
|
epochs = args.epochs + 1
|
|
@@ -287,27 +258,31 @@ def train(args: argparse.Namespace) -> None:
|
|
|
287
258
|
|
|
288
259
|
optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
|
|
289
260
|
last_batch_idx = len(training_loader) - 1
|
|
261
|
+
logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
|
|
290
262
|
|
|
291
263
|
#
|
|
292
264
|
# Loss criteria, optimizer, learning rate scheduler and training parameter groups
|
|
293
265
|
#
|
|
294
266
|
|
|
267
|
+
# Learning rate scaling
|
|
268
|
+
lr = training_utils.scale_lr(args)
|
|
269
|
+
|
|
295
270
|
# Training parameter groups
|
|
296
271
|
custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
|
|
297
272
|
parameters = training_utils.optimizer_parameter_groups(
|
|
298
273
|
net,
|
|
299
274
|
args.wd,
|
|
275
|
+
base_lr=lr,
|
|
300
276
|
norm_weight_decay=args.norm_wd,
|
|
301
277
|
custom_keys_weight_decay=custom_keys_weight_decay,
|
|
278
|
+
custom_layer_weight_decay=args.custom_layer_wd,
|
|
302
279
|
layer_decay=args.layer_decay,
|
|
303
280
|
layer_decay_min_scale=args.layer_decay_min_scale,
|
|
304
281
|
layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
|
|
305
282
|
bias_lr=args.bias_lr,
|
|
283
|
+
custom_layer_lr_scale=args.custom_layer_lr_scale,
|
|
306
284
|
)
|
|
307
285
|
|
|
308
|
-
# Learning rate scaling
|
|
309
|
-
lr = training_utils.scale_lr(args)
|
|
310
|
-
|
|
311
286
|
if args.lr_scheduler_update == "epoch":
|
|
312
287
|
step_update = False
|
|
313
288
|
scheduler_steps_per_epoch = 1
|
birder/scripts/train_ibot.py
CHANGED
|
@@ -39,7 +39,6 @@ from birder.common import training_utils
|
|
|
39
39
|
from birder.common.lib import format_duration
|
|
40
40
|
from birder.common.lib import get_mim_network_name
|
|
41
41
|
from birder.common.lib import get_network_name
|
|
42
|
-
from birder.common.lib import set_random_seeds
|
|
43
42
|
from birder.common.masking import BlockMasking
|
|
44
43
|
from birder.conf import settings
|
|
45
44
|
from birder.data.dataloader.webdataset import make_wds_loader
|
|
@@ -107,44 +106,16 @@ def train(args: argparse.Namespace) -> None:
|
|
|
107
106
|
#
|
|
108
107
|
# Initialize
|
|
109
108
|
#
|
|
110
|
-
training_utils.
|
|
111
|
-
logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
|
|
112
|
-
training_utils.log_git_info()
|
|
109
|
+
(device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
|
|
113
110
|
|
|
114
111
|
if args.size is None:
|
|
115
112
|
args.size = registry.get_default_size(args.network)
|
|
116
113
|
|
|
117
114
|
logger.info(f"Using size={args.size}")
|
|
118
115
|
|
|
119
|
-
if args.cpu is True:
|
|
120
|
-
device = torch.device("cpu")
|
|
121
|
-
device_id = 0
|
|
122
|
-
else:
|
|
123
|
-
device = torch.device("cuda")
|
|
124
|
-
device_id = torch.cuda.current_device()
|
|
125
|
-
|
|
126
|
-
if args.use_deterministic_algorithms is True:
|
|
127
|
-
torch.backends.cudnn.benchmark = False
|
|
128
|
-
torch.use_deterministic_algorithms(True)
|
|
129
|
-
else:
|
|
130
|
-
torch.backends.cudnn.benchmark = True
|
|
131
|
-
|
|
132
|
-
if args.seed is not None:
|
|
133
|
-
set_random_seeds(args.seed)
|
|
134
|
-
|
|
135
|
-
if args.non_interactive is True or training_utils.is_local_primary(args) is False:
|
|
136
|
-
disable_tqdm = True
|
|
137
|
-
elif sys.stderr.isatty() is False:
|
|
138
|
-
disable_tqdm = True
|
|
139
|
-
else:
|
|
140
|
-
disable_tqdm = False
|
|
141
|
-
|
|
142
|
-
# Enable or disable the autograd anomaly detection
|
|
143
|
-
torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
|
|
144
|
-
|
|
145
116
|
batch_size: int = args.batch_size
|
|
146
117
|
grad_accum_steps: int = args.grad_accum_steps
|
|
147
|
-
logger.debug(f"Effective batch size = {
|
|
118
|
+
logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
|
|
148
119
|
|
|
149
120
|
begin_epoch = 1
|
|
150
121
|
epochs = args.epochs + 1
|
|
@@ -354,27 +325,31 @@ def train(args: argparse.Namespace) -> None:
|
|
|
354
325
|
|
|
355
326
|
optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
|
|
356
327
|
last_batch_idx = len(training_loader) - 1
|
|
328
|
+
logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
|
|
357
329
|
|
|
358
330
|
#
|
|
359
331
|
# Loss criteria, optimizer, learning rate scheduler and training parameter groups
|
|
360
332
|
#
|
|
361
333
|
|
|
334
|
+
# Learning rate scaling
|
|
335
|
+
lr = training_utils.scale_lr(args)
|
|
336
|
+
|
|
362
337
|
# Training parameter groups
|
|
363
338
|
custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
|
|
364
339
|
parameters = training_utils.optimizer_parameter_groups(
|
|
365
340
|
net,
|
|
366
341
|
args.wd,
|
|
342
|
+
base_lr=lr,
|
|
367
343
|
norm_weight_decay=args.norm_wd,
|
|
368
344
|
custom_keys_weight_decay=custom_keys_weight_decay,
|
|
345
|
+
custom_layer_weight_decay=args.custom_layer_wd,
|
|
369
346
|
layer_decay=args.layer_decay,
|
|
370
347
|
layer_decay_min_scale=args.layer_decay_min_scale,
|
|
371
348
|
layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
|
|
372
349
|
bias_lr=args.bias_lr,
|
|
350
|
+
custom_layer_lr_scale=args.custom_layer_lr_scale,
|
|
373
351
|
)
|
|
374
352
|
|
|
375
|
-
# Learning rate scaling
|
|
376
|
-
lr = training_utils.scale_lr(args)
|
|
377
|
-
|
|
378
353
|
if args.lr_scheduler_update == "epoch":
|
|
379
354
|
step_update = False
|
|
380
355
|
scheduler_steps_per_epoch = 1
|