birder 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- birder/common/fs_ops.py +2 -2
- birder/introspection/attention_rollout.py +1 -1
- birder/introspection/transformer_attribution.py +1 -1
- birder/layers/layer_scale.py +1 -1
- birder/net/__init__.py +2 -10
- birder/net/_rope_vit_configs.py +430 -0
- birder/net/_vit_configs.py +479 -0
- birder/net/biformer.py +1 -0
- birder/net/cait.py +5 -5
- birder/net/coat.py +12 -12
- birder/net/conv2former.py +3 -3
- birder/net/convmixer.py +1 -1
- birder/net/convnext_v1.py +1 -1
- birder/net/crossvit.py +5 -5
- birder/net/davit.py +1 -1
- birder/net/deit.py +12 -26
- birder/net/deit3.py +42 -189
- birder/net/densenet.py +9 -8
- birder/net/detection/deformable_detr.py +5 -2
- birder/net/detection/detr.py +5 -2
- birder/net/detection/efficientdet.py +1 -1
- birder/net/dpn.py +1 -2
- birder/net/edgenext.py +2 -1
- birder/net/edgevit.py +3 -0
- birder/net/efficientformer_v1.py +2 -1
- birder/net/efficientformer_v2.py +18 -31
- birder/net/efficientnet_v2.py +3 -0
- birder/net/efficientvit_mit.py +5 -5
- birder/net/fasternet.py +2 -2
- birder/net/flexivit.py +22 -43
- birder/net/groupmixformer.py +1 -1
- birder/net/hgnet_v1.py +5 -5
- birder/net/inception_next.py +1 -1
- birder/net/inception_resnet_v1.py +3 -3
- birder/net/inception_resnet_v2.py +7 -4
- birder/net/inception_v3.py +3 -0
- birder/net/inception_v4.py +3 -0
- birder/net/maxvit.py +1 -1
- birder/net/metaformer.py +3 -3
- birder/net/mim/crossmae.py +1 -1
- birder/net/mim/mae_vit.py +1 -1
- birder/net/mim/simmim.py +1 -1
- birder/net/mobilenet_v1.py +0 -9
- birder/net/mobilenet_v2.py +38 -44
- birder/net/{mobilenet_v3_large.py → mobilenet_v3.py} +37 -10
- birder/net/mobilevit_v1.py +5 -32
- birder/net/mobilevit_v2.py +1 -45
- birder/net/moganet.py +8 -5
- birder/net/mvit_v2.py +6 -6
- birder/net/nfnet.py +4 -0
- birder/net/pit.py +1 -1
- birder/net/pvt_v1.py +5 -5
- birder/net/pvt_v2.py +5 -5
- birder/net/repghost.py +1 -30
- birder/net/resmlp.py +2 -2
- birder/net/resnest.py +3 -0
- birder/net/resnet_v1.py +125 -1
- birder/net/resnet_v2.py +75 -1
- birder/net/resnext.py +35 -1
- birder/net/rope_deit3.py +33 -136
- birder/net/rope_flexivit.py +18 -18
- birder/net/rope_vit.py +3 -735
- birder/net/simple_vit.py +22 -16
- birder/net/smt.py +1 -1
- birder/net/squeezenet.py +5 -12
- birder/net/squeezenext.py +0 -24
- birder/net/ssl/capi.py +1 -1
- birder/net/ssl/data2vec.py +1 -1
- birder/net/ssl/dino_v2.py +2 -2
- birder/net/ssl/franca.py +2 -2
- birder/net/ssl/i_jepa.py +1 -1
- birder/net/ssl/ibot.py +1 -1
- birder/net/swiftformer.py +12 -2
- birder/net/swin_transformer_v2.py +1 -1
- birder/net/tiny_vit.py +3 -16
- birder/net/van.py +2 -2
- birder/net/vit.py +35 -963
- birder/net/vit_sam.py +13 -38
- birder/net/xcit.py +7 -6
- birder/tools/introspection.py +1 -1
- birder/tools/model_info.py +3 -1
- birder/version.py +1 -1
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/METADATA +1 -1
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/RECORD +88 -90
- birder/net/mobilenet_v3_small.py +0 -43
- birder/net/se_resnet_v1.py +0 -105
- birder/net/se_resnet_v2.py +0 -59
- birder/net/se_resnext.py +0 -30
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/WHEEL +0 -0
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/entry_points.txt +0 -0
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {birder-0.3.3.dist-info → birder-0.4.0.dist-info}/top_level.txt +0 -0
birder/net/rope_deit3.py
CHANGED
|
@@ -23,6 +23,11 @@ from torch import nn
|
|
|
23
23
|
|
|
24
24
|
from birder.common.masking import mask_tensor
|
|
25
25
|
from birder.model_registry import registry
|
|
26
|
+
from birder.net._vit_configs import BASE
|
|
27
|
+
from birder.net._vit_configs import LARGE
|
|
28
|
+
from birder.net._vit_configs import MEDIUM
|
|
29
|
+
from birder.net._vit_configs import SMALL
|
|
30
|
+
from birder.net._vit_configs import TINY
|
|
26
31
|
from birder.net.base import DetectorBackbone
|
|
27
32
|
from birder.net.base import MaskedTokenOmissionMixin
|
|
28
33
|
from birder.net.base import MaskedTokenRetentionMixin
|
|
@@ -462,204 +467,96 @@ class RoPE_DeiT3(DetectorBackbone, PreTrainEncoder, MaskedTokenOmissionMixin, Ma
|
|
|
462
467
|
registry.register_model_config(
|
|
463
468
|
"rope_deit3_t16",
|
|
464
469
|
RoPE_DeiT3,
|
|
465
|
-
config={
|
|
466
|
-
"patch_size": 16,
|
|
467
|
-
"num_layers": 12,
|
|
468
|
-
"num_heads": 3,
|
|
469
|
-
"hidden_dim": 192,
|
|
470
|
-
"mlp_dim": 768,
|
|
471
|
-
"drop_path_rate": 0.0,
|
|
472
|
-
},
|
|
470
|
+
config={"patch_size": 16, **TINY},
|
|
473
471
|
)
|
|
474
472
|
registry.register_model_config(
|
|
475
473
|
"rope_deit3_s16",
|
|
476
474
|
RoPE_DeiT3,
|
|
477
|
-
config={
|
|
478
|
-
"patch_size": 16,
|
|
479
|
-
"num_layers": 12,
|
|
480
|
-
"num_heads": 6,
|
|
481
|
-
"hidden_dim": 384,
|
|
482
|
-
"mlp_dim": 1536,
|
|
483
|
-
"drop_path_rate": 0.05,
|
|
484
|
-
},
|
|
475
|
+
config={"patch_size": 16, **SMALL, "drop_path_rate": 0.05},
|
|
485
476
|
)
|
|
486
477
|
registry.register_model_config(
|
|
487
478
|
"rope_deit3_s14",
|
|
488
479
|
RoPE_DeiT3,
|
|
489
|
-
config={
|
|
490
|
-
"patch_size": 14,
|
|
491
|
-
"num_layers": 12,
|
|
492
|
-
"num_heads": 6,
|
|
493
|
-
"hidden_dim": 384,
|
|
494
|
-
"mlp_dim": 1536,
|
|
495
|
-
"drop_path_rate": 0.05,
|
|
496
|
-
},
|
|
480
|
+
config={"patch_size": 14, **SMALL, "drop_path_rate": 0.05},
|
|
497
481
|
)
|
|
498
482
|
registry.register_model_config(
|
|
499
483
|
"rope_deit3_m16",
|
|
500
484
|
RoPE_DeiT3,
|
|
501
|
-
config={
|
|
502
|
-
"patch_size": 16,
|
|
503
|
-
"num_layers": 12,
|
|
504
|
-
"num_heads": 8,
|
|
505
|
-
"hidden_dim": 512,
|
|
506
|
-
"mlp_dim": 2048,
|
|
507
|
-
"drop_path_rate": 0.1,
|
|
508
|
-
},
|
|
485
|
+
config={"patch_size": 16, **MEDIUM, "drop_path_rate": 0.1},
|
|
509
486
|
)
|
|
510
487
|
registry.register_model_config(
|
|
511
488
|
"rope_deit3_m14",
|
|
512
489
|
RoPE_DeiT3,
|
|
513
|
-
config={
|
|
514
|
-
"patch_size": 14,
|
|
515
|
-
"num_layers": 12,
|
|
516
|
-
"num_heads": 8,
|
|
517
|
-
"hidden_dim": 512,
|
|
518
|
-
"mlp_dim": 2048,
|
|
519
|
-
"drop_path_rate": 0.1,
|
|
520
|
-
},
|
|
490
|
+
config={"patch_size": 14, **MEDIUM, "drop_path_rate": 0.1},
|
|
521
491
|
)
|
|
522
492
|
registry.register_model_config(
|
|
523
493
|
"rope_deit3_b16",
|
|
524
494
|
RoPE_DeiT3,
|
|
525
|
-
config={
|
|
526
|
-
"patch_size": 16,
|
|
527
|
-
"num_layers": 12,
|
|
528
|
-
"num_heads": 12,
|
|
529
|
-
"hidden_dim": 768,
|
|
530
|
-
"mlp_dim": 3072,
|
|
531
|
-
"drop_path_rate": 0.2,
|
|
532
|
-
},
|
|
495
|
+
config={"patch_size": 16, **BASE, "drop_path_rate": 0.2},
|
|
533
496
|
)
|
|
534
497
|
registry.register_model_config(
|
|
535
498
|
"rope_deit3_b14",
|
|
536
499
|
RoPE_DeiT3,
|
|
537
|
-
config={
|
|
538
|
-
"patch_size": 14,
|
|
539
|
-
"num_layers": 12,
|
|
540
|
-
"num_heads": 12,
|
|
541
|
-
"hidden_dim": 768,
|
|
542
|
-
"mlp_dim": 3072,
|
|
543
|
-
"drop_path_rate": 0.2,
|
|
544
|
-
},
|
|
500
|
+
config={"patch_size": 14, **BASE, "drop_path_rate": 0.2},
|
|
545
501
|
)
|
|
546
502
|
registry.register_model_config(
|
|
547
503
|
"rope_deit3_l16",
|
|
548
504
|
RoPE_DeiT3,
|
|
549
|
-
config={
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
"drop_path_rate": 0.45,
|
|
556
|
-
},
|
|
505
|
+
config={"patch_size": 16, **LARGE, "drop_path_rate": 0.45},
|
|
506
|
+
)
|
|
507
|
+
registry.register_model_config(
|
|
508
|
+
"rope_deit3_l14",
|
|
509
|
+
RoPE_DeiT3,
|
|
510
|
+
config={"patch_size": 14, **LARGE, "drop_path_rate": 0.45},
|
|
557
511
|
)
|
|
558
512
|
|
|
559
513
|
# With registers
|
|
514
|
+
####################
|
|
515
|
+
|
|
560
516
|
registry.register_model_config(
|
|
561
517
|
"rope_deit3_reg4_t16",
|
|
562
518
|
RoPE_DeiT3,
|
|
563
|
-
config={
|
|
564
|
-
"patch_size": 16,
|
|
565
|
-
"num_layers": 12,
|
|
566
|
-
"num_heads": 3,
|
|
567
|
-
"hidden_dim": 192,
|
|
568
|
-
"mlp_dim": 768,
|
|
569
|
-
"num_reg_tokens": 4,
|
|
570
|
-
"drop_path_rate": 0.0,
|
|
571
|
-
},
|
|
519
|
+
config={"patch_size": 16, **TINY, "num_reg_tokens": 4},
|
|
572
520
|
)
|
|
573
521
|
registry.register_model_config(
|
|
574
522
|
"rope_deit3_reg4_s16",
|
|
575
523
|
RoPE_DeiT3,
|
|
576
|
-
config={
|
|
577
|
-
"patch_size": 16,
|
|
578
|
-
"num_layers": 12,
|
|
579
|
-
"num_heads": 6,
|
|
580
|
-
"hidden_dim": 384,
|
|
581
|
-
"mlp_dim": 1536,
|
|
582
|
-
"num_reg_tokens": 4,
|
|
583
|
-
"drop_path_rate": 0.05,
|
|
584
|
-
},
|
|
524
|
+
config={"patch_size": 16, **SMALL, "num_reg_tokens": 4, "drop_path_rate": 0.05},
|
|
585
525
|
)
|
|
586
526
|
registry.register_model_config(
|
|
587
527
|
"rope_deit3_reg4_s14",
|
|
588
528
|
RoPE_DeiT3,
|
|
589
|
-
config={
|
|
590
|
-
"patch_size": 14,
|
|
591
|
-
"num_layers": 12,
|
|
592
|
-
"num_heads": 6,
|
|
593
|
-
"hidden_dim": 384,
|
|
594
|
-
"mlp_dim": 1536,
|
|
595
|
-
"num_reg_tokens": 4,
|
|
596
|
-
"drop_path_rate": 0.05,
|
|
597
|
-
},
|
|
529
|
+
config={"patch_size": 14, **SMALL, "num_reg_tokens": 4, "drop_path_rate": 0.05},
|
|
598
530
|
)
|
|
599
531
|
registry.register_model_config(
|
|
600
532
|
"rope_deit3_reg4_m16",
|
|
601
533
|
RoPE_DeiT3,
|
|
602
|
-
config={
|
|
603
|
-
"patch_size": 16,
|
|
604
|
-
"num_layers": 12,
|
|
605
|
-
"num_heads": 8,
|
|
606
|
-
"hidden_dim": 512,
|
|
607
|
-
"mlp_dim": 2048,
|
|
608
|
-
"num_reg_tokens": 4,
|
|
609
|
-
"drop_path_rate": 0.1,
|
|
610
|
-
},
|
|
534
|
+
config={"patch_size": 16, **MEDIUM, "num_reg_tokens": 4, "drop_path_rate": 0.1},
|
|
611
535
|
)
|
|
612
536
|
registry.register_model_config(
|
|
613
537
|
"rope_deit3_reg4_m14",
|
|
614
538
|
RoPE_DeiT3,
|
|
615
|
-
config={
|
|
616
|
-
"patch_size": 14,
|
|
617
|
-
"num_layers": 12,
|
|
618
|
-
"num_heads": 8,
|
|
619
|
-
"hidden_dim": 512,
|
|
620
|
-
"mlp_dim": 2048,
|
|
621
|
-
"num_reg_tokens": 4,
|
|
622
|
-
"drop_path_rate": 0.1,
|
|
623
|
-
},
|
|
539
|
+
config={"patch_size": 14, **MEDIUM, "num_reg_tokens": 4, "drop_path_rate": 0.1},
|
|
624
540
|
)
|
|
625
541
|
registry.register_model_config(
|
|
626
542
|
"rope_deit3_reg4_b16",
|
|
627
543
|
RoPE_DeiT3,
|
|
628
|
-
config={
|
|
629
|
-
"patch_size": 16,
|
|
630
|
-
"num_layers": 12,
|
|
631
|
-
"num_heads": 12,
|
|
632
|
-
"hidden_dim": 768,
|
|
633
|
-
"mlp_dim": 3072,
|
|
634
|
-
"num_reg_tokens": 4,
|
|
635
|
-
"drop_path_rate": 0.2,
|
|
636
|
-
},
|
|
544
|
+
config={"patch_size": 16, **BASE, "num_reg_tokens": 4, "drop_path_rate": 0.2},
|
|
637
545
|
)
|
|
638
546
|
registry.register_model_config(
|
|
639
547
|
"rope_deit3_reg4_b14",
|
|
640
548
|
RoPE_DeiT3,
|
|
641
|
-
config={
|
|
642
|
-
"patch_size": 14,
|
|
643
|
-
"num_layers": 12,
|
|
644
|
-
"num_heads": 12,
|
|
645
|
-
"hidden_dim": 768,
|
|
646
|
-
"mlp_dim": 3072,
|
|
647
|
-
"num_reg_tokens": 4,
|
|
648
|
-
"drop_path_rate": 0.2,
|
|
649
|
-
},
|
|
549
|
+
config={"patch_size": 14, **BASE, "num_reg_tokens": 4, "drop_path_rate": 0.2},
|
|
650
550
|
)
|
|
651
551
|
registry.register_model_config(
|
|
652
552
|
"rope_deit3_reg4_l16",
|
|
653
553
|
RoPE_DeiT3,
|
|
654
|
-
config={
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
"num_reg_tokens": 4,
|
|
661
|
-
"drop_path_rate": 0.45,
|
|
662
|
-
},
|
|
554
|
+
config={"patch_size": 16, **LARGE, "num_reg_tokens": 4, "drop_path_rate": 0.45},
|
|
555
|
+
)
|
|
556
|
+
registry.register_model_config(
|
|
557
|
+
"rope_deit3_reg4_l14",
|
|
558
|
+
RoPE_DeiT3,
|
|
559
|
+
config={"patch_size": 14, **LARGE, "num_reg_tokens": 4, "drop_path_rate": 0.45},
|
|
663
560
|
)
|
|
664
561
|
|
|
665
562
|
registry.register_weights(
|
birder/net/rope_flexivit.py
CHANGED
|
@@ -21,6 +21,8 @@ from birder.layers import MultiHeadAttentionPool
|
|
|
21
21
|
from birder.layers import SwiGLU_FFN
|
|
22
22
|
from birder.layers.activations import get_activation_module
|
|
23
23
|
from birder.model_registry import registry
|
|
24
|
+
from birder.net._vit_configs import BASE
|
|
25
|
+
from birder.net._vit_configs import SMALL
|
|
24
26
|
from birder.net.base import DetectorBackbone
|
|
25
27
|
from birder.net.base import MaskedTokenOmissionMixin
|
|
26
28
|
from birder.net.base import MaskedTokenRetentionMixin
|
|
@@ -661,26 +663,24 @@ class RoPE_FlexiViT(DetectorBackbone, PreTrainEncoder, MaskedTokenOmissionMixin,
|
|
|
661
663
|
registry.register_model_config(
|
|
662
664
|
"rope_flexivit_s16",
|
|
663
665
|
RoPE_FlexiViT,
|
|
664
|
-
config={
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
666
|
+
config={"patch_size": 16, **SMALL},
|
|
667
|
+
)
|
|
668
|
+
registry.register_model_config(
|
|
669
|
+
"rope_flexivit_b16",
|
|
670
|
+
RoPE_FlexiViT,
|
|
671
|
+
config={"patch_size": 16, **BASE},
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# With registers
|
|
675
|
+
####################
|
|
676
|
+
|
|
677
|
+
registry.register_model_config(
|
|
678
|
+
"rope_flexivit_reg1_s16",
|
|
679
|
+
RoPE_FlexiViT,
|
|
680
|
+
config={"patch_size": 16, **SMALL, "num_reg_tokens": 1},
|
|
672
681
|
)
|
|
673
682
|
registry.register_model_config(
|
|
674
683
|
"rope_flexivit_reg4_b16_avg",
|
|
675
684
|
RoPE_FlexiViT,
|
|
676
|
-
config={
|
|
677
|
-
"patch_size": 16,
|
|
678
|
-
"num_layers": 12,
|
|
679
|
-
"num_heads": 12,
|
|
680
|
-
"hidden_dim": 768,
|
|
681
|
-
"mlp_dim": 3072,
|
|
682
|
-
"num_reg_tokens": 4,
|
|
683
|
-
"class_token": False,
|
|
684
|
-
"drop_path_rate": 0.1,
|
|
685
|
-
},
|
|
685
|
+
config={"patch_size": 16, **BASE, "num_reg_tokens": 4, "class_token": False},
|
|
686
686
|
)
|