birder 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. birder/common/lib.py +2 -9
  2. birder/common/training_cli.py +18 -0
  3. birder/common/training_utils.py +123 -10
  4. birder/data/collators/detection.py +10 -3
  5. birder/data/datasets/coco.py +8 -10
  6. birder/data/transforms/detection.py +30 -13
  7. birder/inference/detection.py +108 -4
  8. birder/inference/wbf.py +226 -0
  9. birder/net/__init__.py +8 -0
  10. birder/net/detection/efficientdet.py +65 -86
  11. birder/net/detection/rt_detr_v1.py +1 -0
  12. birder/net/detection/yolo_anchors.py +205 -0
  13. birder/net/detection/yolo_v2.py +25 -24
  14. birder/net/detection/yolo_v3.py +39 -40
  15. birder/net/detection/yolo_v4.py +28 -26
  16. birder/net/detection/yolo_v4_tiny.py +24 -20
  17. birder/net/fasternet.py +1 -1
  18. birder/net/gc_vit.py +671 -0
  19. birder/net/lit_v1.py +472 -0
  20. birder/net/lit_v1_tiny.py +342 -0
  21. birder/net/lit_v2.py +436 -0
  22. birder/net/mobilenet_v4_hybrid.py +1 -1
  23. birder/net/resnet_v1.py +1 -1
  24. birder/net/resnext.py +67 -25
  25. birder/net/se_resnet_v1.py +46 -0
  26. birder/net/se_resnext.py +3 -0
  27. birder/net/simple_vit.py +2 -2
  28. birder/net/vit.py +0 -15
  29. birder/net/vovnet_v2.py +31 -1
  30. birder/scripts/benchmark.py +90 -21
  31. birder/scripts/predict.py +1 -0
  32. birder/scripts/predict_detection.py +18 -11
  33. birder/scripts/train.py +10 -34
  34. birder/scripts/train_barlow_twins.py +10 -34
  35. birder/scripts/train_byol.py +10 -34
  36. birder/scripts/train_capi.py +10 -35
  37. birder/scripts/train_data2vec.py +9 -34
  38. birder/scripts/train_data2vec2.py +9 -34
  39. birder/scripts/train_detection.py +48 -40
  40. birder/scripts/train_dino_v1.py +10 -34
  41. birder/scripts/train_dino_v2.py +9 -34
  42. birder/scripts/train_dino_v2_dist.py +9 -34
  43. birder/scripts/train_franca.py +9 -34
  44. birder/scripts/train_i_jepa.py +9 -34
  45. birder/scripts/train_ibot.py +9 -34
  46. birder/scripts/train_kd.py +156 -64
  47. birder/scripts/train_mim.py +10 -34
  48. birder/scripts/train_mmcr.py +10 -34
  49. birder/scripts/train_rotnet.py +10 -34
  50. birder/scripts/train_simclr.py +10 -34
  51. birder/scripts/train_vicreg.py +10 -34
  52. birder/tools/auto_anchors.py +20 -1
  53. birder/tools/pack.py +172 -103
  54. birder/tools/show_det_iterator.py +10 -1
  55. birder/version.py +1 -1
  56. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/METADATA +3 -3
  57. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/RECORD +61 -55
  58. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/WHEEL +0 -0
  59. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/entry_points.txt +0 -0
  60. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/licenses/LICENSE +0 -0
  61. {birder-0.2.2.dist-info → birder-0.2.3.dist-info}/top_level.txt +0 -0
@@ -33,7 +33,6 @@ from birder.common import training_utils
33
33
  from birder.common.lib import format_duration
34
34
  from birder.common.lib import get_mim_network_name
35
35
  from birder.common.lib import get_network_name
36
- from birder.common.lib import set_random_seeds
37
36
  from birder.common.masking import BlockMasking
38
37
  from birder.conf import settings
39
38
  from birder.data.dataloader.webdataset import make_wds_loader
@@ -179,44 +178,16 @@ def train(args: argparse.Namespace) -> None:
179
178
  #
180
179
  # Initialize
181
180
  #
182
- training_utils.init_distributed_mode(args)
183
- logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
184
- training_utils.log_git_info()
181
+ (device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
185
182
 
186
183
  if args.size is None:
187
184
  args.size = registry.get_default_size(args.network)
188
185
 
189
186
  logger.info(f"Using size={args.size}")
190
187
 
191
- if args.cpu is True:
192
- device = torch.device("cpu")
193
- device_id = 0
194
- else:
195
- device = torch.device("cuda")
196
- device_id = torch.cuda.current_device()
197
-
198
- if args.use_deterministic_algorithms is True:
199
- torch.backends.cudnn.benchmark = False
200
- torch.use_deterministic_algorithms(True)
201
- else:
202
- torch.backends.cudnn.benchmark = True
203
-
204
- if args.seed is not None:
205
- set_random_seeds(args.seed)
206
-
207
- if args.non_interactive is True or training_utils.is_local_primary(args) is False:
208
- disable_tqdm = True
209
- elif sys.stderr.isatty() is False:
210
- disable_tqdm = True
211
- else:
212
- disable_tqdm = False
213
-
214
- # Enable or disable the autograd anomaly detection
215
- torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
216
-
217
188
  batch_size: int = args.batch_size
218
189
  grad_accum_steps: int = args.grad_accum_steps
219
- logger.debug(f"Effective batch size = {args.batch_size * grad_accum_steps * args.world_size}")
190
+ logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
220
191
 
221
192
  begin_epoch = 1
222
193
  epochs = args.epochs + 1
@@ -442,27 +413,31 @@ def train(args: argparse.Namespace) -> None:
442
413
 
443
414
  optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
444
415
  last_batch_idx = len(training_loader) - 1
416
+ logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
445
417
 
446
418
  #
447
419
  # Optimizer, learning rate scheduler and training parameter groups
448
420
  #
449
421
 
422
+ # Learning rate scaling
423
+ lr = training_utils.scale_lr(args)
424
+
450
425
  # Training parameter groups
451
426
  custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
452
427
  parameters = training_utils.optimizer_parameter_groups(
453
428
  net,
454
429
  args.wd,
430
+ base_lr=lr,
455
431
  norm_weight_decay=args.norm_wd,
456
432
  custom_keys_weight_decay=custom_keys_weight_decay,
433
+ custom_layer_weight_decay=args.custom_layer_wd,
457
434
  layer_decay=args.layer_decay,
458
435
  layer_decay_min_scale=args.layer_decay_min_scale,
459
436
  layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
460
437
  bias_lr=args.bias_lr,
438
+ custom_layer_lr_scale=args.custom_layer_lr_scale,
461
439
  )
462
440
 
463
- # Learning rate scaling
464
- lr = training_utils.scale_lr(args)
465
-
466
441
  if args.lr_scheduler_update == "epoch":
467
442
  step_update = False
468
443
  scheduler_steps_per_epoch = 1
@@ -37,7 +37,6 @@ from birder.common import training_utils
37
37
  from birder.common.lib import format_duration
38
38
  from birder.common.lib import get_mim_network_name
39
39
  from birder.common.lib import get_network_name
40
- from birder.common.lib import set_random_seeds
41
40
  from birder.common.masking import RollBlockMasking
42
41
  from birder.conf import settings
43
42
  from birder.data.dataloader.webdataset import make_wds_loader
@@ -205,44 +204,16 @@ def train(args: argparse.Namespace) -> None:
205
204
  #
206
205
  # Initialize
207
206
  #
208
- training_utils.init_distributed_mode(args)
209
- logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
210
- training_utils.log_git_info()
207
+ (device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
211
208
 
212
209
  if args.size is None:
213
210
  args.size = registry.get_default_size(args.network)
214
211
 
215
212
  logger.info(f"Using size={args.size}")
216
213
 
217
- if args.cpu is True:
218
- device = torch.device("cpu")
219
- device_id = 0
220
- else:
221
- device = torch.device("cuda")
222
- device_id = torch.cuda.current_device()
223
-
224
- if args.use_deterministic_algorithms is True:
225
- torch.backends.cudnn.benchmark = False
226
- torch.use_deterministic_algorithms(True)
227
- else:
228
- torch.backends.cudnn.benchmark = True
229
-
230
- if args.seed is not None:
231
- set_random_seeds(args.seed)
232
-
233
- if args.non_interactive is True or training_utils.is_local_primary(args) is False:
234
- disable_tqdm = True
235
- elif sys.stderr.isatty() is False:
236
- disable_tqdm = True
237
- else:
238
- disable_tqdm = False
239
-
240
- # Enable or disable the autograd anomaly detection
241
- torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
242
-
243
214
  batch_size: int = args.batch_size
244
215
  grad_accum_steps: int = args.grad_accum_steps
245
- logger.debug(f"Effective batch size = {args.batch_size * grad_accum_steps * args.world_size}")
216
+ logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
246
217
 
247
218
  begin_epoch = 1
248
219
  epochs = args.epochs + 1
@@ -447,27 +418,31 @@ def train(args: argparse.Namespace) -> None:
447
418
 
448
419
  optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
449
420
  last_batch_idx = len(training_loader) - 1
421
+ logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
450
422
 
451
423
  #
452
424
  # Optimizer, learning rate scheduler and training parameter groups
453
425
  #
454
426
 
427
+ # Learning rate scaling
428
+ lr = training_utils.scale_lr(args)
429
+
455
430
  # Training parameter groups
456
431
  custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
457
432
  parameters = training_utils.optimizer_parameter_groups(
458
433
  net,
459
434
  args.wd,
435
+ base_lr=lr,
460
436
  norm_weight_decay=args.norm_wd,
461
437
  custom_keys_weight_decay=custom_keys_weight_decay,
438
+ custom_layer_weight_decay=args.custom_layer_wd,
462
439
  layer_decay=args.layer_decay,
463
440
  layer_decay_min_scale=args.layer_decay_min_scale,
464
441
  layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
465
442
  bias_lr=args.bias_lr,
443
+ custom_layer_lr_scale=args.custom_layer_lr_scale,
466
444
  )
467
445
 
468
- # Learning rate scaling
469
- lr = training_utils.scale_lr(args)
470
-
471
446
  if args.lr_scheduler_update == "epoch":
472
447
  step_update = False
473
448
  scheduler_steps_per_epoch = 1
@@ -40,7 +40,6 @@ from birder.common import training_utils
40
40
  from birder.common.lib import format_duration
41
41
  from birder.common.lib import get_mim_network_name
42
42
  from birder.common.lib import get_network_name
43
- from birder.common.lib import set_random_seeds
44
43
  from birder.conf import settings
45
44
  from birder.data.dataloader.webdataset import make_wds_loader
46
45
  from birder.data.datasets.directory import make_image_dataset
@@ -84,44 +83,16 @@ def train(args: argparse.Namespace) -> None:
84
83
  #
85
84
  # Initialize
86
85
  #
87
- training_utils.init_distributed_mode(args)
88
- logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
89
- training_utils.log_git_info()
86
+ (device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
90
87
 
91
88
  if args.size is None:
92
89
  args.size = registry.get_default_size(args.network)
93
90
 
94
91
  logger.info(f"Using size={args.size}")
95
92
 
96
- if args.cpu is True:
97
- device = torch.device("cpu")
98
- device_id = 0
99
- else:
100
- device = torch.device("cuda")
101
- device_id = torch.cuda.current_device()
102
-
103
- if args.use_deterministic_algorithms is True:
104
- torch.backends.cudnn.benchmark = False
105
- torch.use_deterministic_algorithms(True)
106
- else:
107
- torch.backends.cudnn.benchmark = True
108
-
109
- if args.seed is not None:
110
- set_random_seeds(args.seed)
111
-
112
- if args.non_interactive is True or training_utils.is_local_primary(args) is False:
113
- disable_tqdm = True
114
- elif sys.stderr.isatty() is False:
115
- disable_tqdm = True
116
- else:
117
- disable_tqdm = False
118
-
119
- # Enable or disable the autograd anomaly detection
120
- torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
121
-
122
93
  batch_size: int = args.batch_size
123
94
  grad_accum_steps: int = args.grad_accum_steps
124
- logger.debug(f"Effective batch size = {args.batch_size * grad_accum_steps * args.world_size}")
95
+ logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
125
96
 
126
97
  begin_epoch = 1
127
98
  epochs = args.epochs + 1
@@ -287,27 +258,31 @@ def train(args: argparse.Namespace) -> None:
287
258
 
288
259
  optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
289
260
  last_batch_idx = len(training_loader) - 1
261
+ logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
290
262
 
291
263
  #
292
264
  # Loss criteria, optimizer, learning rate scheduler and training parameter groups
293
265
  #
294
266
 
267
+ # Learning rate scaling
268
+ lr = training_utils.scale_lr(args)
269
+
295
270
  # Training parameter groups
296
271
  custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
297
272
  parameters = training_utils.optimizer_parameter_groups(
298
273
  net,
299
274
  args.wd,
275
+ base_lr=lr,
300
276
  norm_weight_decay=args.norm_wd,
301
277
  custom_keys_weight_decay=custom_keys_weight_decay,
278
+ custom_layer_weight_decay=args.custom_layer_wd,
302
279
  layer_decay=args.layer_decay,
303
280
  layer_decay_min_scale=args.layer_decay_min_scale,
304
281
  layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
305
282
  bias_lr=args.bias_lr,
283
+ custom_layer_lr_scale=args.custom_layer_lr_scale,
306
284
  )
307
285
 
308
- # Learning rate scaling
309
- lr = training_utils.scale_lr(args)
310
-
311
286
  if args.lr_scheduler_update == "epoch":
312
287
  step_update = False
313
288
  scheduler_steps_per_epoch = 1
@@ -39,7 +39,6 @@ from birder.common import training_utils
39
39
  from birder.common.lib import format_duration
40
40
  from birder.common.lib import get_mim_network_name
41
41
  from birder.common.lib import get_network_name
42
- from birder.common.lib import set_random_seeds
43
42
  from birder.common.masking import BlockMasking
44
43
  from birder.conf import settings
45
44
  from birder.data.dataloader.webdataset import make_wds_loader
@@ -107,44 +106,16 @@ def train(args: argparse.Namespace) -> None:
107
106
  #
108
107
  # Initialize
109
108
  #
110
- training_utils.init_distributed_mode(args)
111
- logger.info(f"Starting training, birder version: {birder.__version__}, pytorch version: {torch.__version__}")
112
- training_utils.log_git_info()
109
+ (device, device_id, disable_tqdm) = training_utils.init_training(args, logger)
113
110
 
114
111
  if args.size is None:
115
112
  args.size = registry.get_default_size(args.network)
116
113
 
117
114
  logger.info(f"Using size={args.size}")
118
115
 
119
- if args.cpu is True:
120
- device = torch.device("cpu")
121
- device_id = 0
122
- else:
123
- device = torch.device("cuda")
124
- device_id = torch.cuda.current_device()
125
-
126
- if args.use_deterministic_algorithms is True:
127
- torch.backends.cudnn.benchmark = False
128
- torch.use_deterministic_algorithms(True)
129
- else:
130
- torch.backends.cudnn.benchmark = True
131
-
132
- if args.seed is not None:
133
- set_random_seeds(args.seed)
134
-
135
- if args.non_interactive is True or training_utils.is_local_primary(args) is False:
136
- disable_tqdm = True
137
- elif sys.stderr.isatty() is False:
138
- disable_tqdm = True
139
- else:
140
- disable_tqdm = False
141
-
142
- # Enable or disable the autograd anomaly detection
143
- torch.autograd.set_detect_anomaly(args.grad_anomaly_detection)
144
-
145
116
  batch_size: int = args.batch_size
146
117
  grad_accum_steps: int = args.grad_accum_steps
147
- logger.debug(f"Effective batch size = {args.batch_size * grad_accum_steps * args.world_size}")
118
+ logger.debug(f"Effective batch size = {batch_size * grad_accum_steps * args.world_size}")
148
119
 
149
120
  begin_epoch = 1
150
121
  epochs = args.epochs + 1
@@ -354,27 +325,31 @@ def train(args: argparse.Namespace) -> None:
354
325
 
355
326
  optimizer_steps_per_epoch = math.ceil(len(training_loader) / grad_accum_steps)
356
327
  last_batch_idx = len(training_loader) - 1
328
+ logging.debug(f"Epoch has {last_batch_idx+1} iterations ({optimizer_steps_per_epoch} steps)")
357
329
 
358
330
  #
359
331
  # Loss criteria, optimizer, learning rate scheduler and training parameter groups
360
332
  #
361
333
 
334
+ # Learning rate scaling
335
+ lr = training_utils.scale_lr(args)
336
+
362
337
  # Training parameter groups
363
338
  custom_keys_weight_decay = training_utils.get_wd_custom_keys(args)
364
339
  parameters = training_utils.optimizer_parameter_groups(
365
340
  net,
366
341
  args.wd,
342
+ base_lr=lr,
367
343
  norm_weight_decay=args.norm_wd,
368
344
  custom_keys_weight_decay=custom_keys_weight_decay,
345
+ custom_layer_weight_decay=args.custom_layer_wd,
369
346
  layer_decay=args.layer_decay,
370
347
  layer_decay_min_scale=args.layer_decay_min_scale,
371
348
  layer_decay_no_opt_scale=args.layer_decay_no_opt_scale,
372
349
  bias_lr=args.bias_lr,
350
+ custom_layer_lr_scale=args.custom_layer_lr_scale,
373
351
  )
374
352
 
375
- # Learning rate scaling
376
- lr = training_utils.scale_lr(args)
377
-
378
353
  if args.lr_scheduler_update == "epoch":
379
354
  step_update = False
380
355
  scheduler_steps_per_epoch = 1