PyPI - birder - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

birder 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (216) hide show

birder/adversarial/base.py +1 -1
birder/adversarial/simba.py +4 -4
birder/common/cli.py +1 -1
birder/common/fs_ops.py +13 -13
birder/common/lib.py +2 -2
birder/common/masking.py +3 -3
birder/common/training_cli.py +24 -2
birder/common/training_utils.py +28 -4
birder/data/collators/detection.py +9 -1
birder/data/transforms/detection.py +27 -8
birder/data/transforms/mosaic.py +1 -1
birder/datahub/classification.py +3 -3
birder/inference/classification.py +3 -3
birder/inference/data_parallel.py +1 -1
birder/inference/detection.py +5 -5
birder/inference/wbf.py +1 -1
birder/introspection/attention_rollout.py +6 -6
birder/introspection/feature_pca.py +4 -4
birder/introspection/gradcam.py +1 -1
birder/introspection/guided_backprop.py +2 -2
birder/introspection/transformer_attribution.py +4 -4
birder/layers/attention_pool.py +2 -2
birder/layers/layer_scale.py +1 -1
birder/model_registry/model_registry.py +2 -1
birder/net/__init__.py +4 -10
birder/net/_rope_vit_configs.py +435 -0
birder/net/_vit_configs.py +466 -0
birder/net/alexnet.py +5 -5
birder/net/base.py +28 -3
birder/net/biformer.py +18 -17
birder/net/cait.py +7 -7
birder/net/cas_vit.py +1 -1
birder/net/coat.py +27 -27
birder/net/conv2former.py +3 -3
birder/net/convmixer.py +1 -1
birder/net/convnext_v1.py +3 -11
birder/net/convnext_v1_iso.py +198 -0
birder/net/convnext_v2.py +2 -10
birder/net/crossformer.py +9 -9
birder/net/crossvit.py +6 -6
birder/net/cspnet.py +1 -1
birder/net/cswin_transformer.py +10 -10
birder/net/davit.py +11 -11
birder/net/deit.py +68 -29
birder/net/deit3.py +69 -204
birder/net/densenet.py +9 -8
birder/net/detection/__init__.py +4 -0
birder/net/detection/{yolo_anchors.py → _yolo_anchors.py} +5 -5
birder/net/detection/base.py +6 -5
birder/net/detection/deformable_detr.py +31 -30
birder/net/detection/detr.py +14 -11
birder/net/detection/efficientdet.py +10 -29
birder/net/detection/faster_rcnn.py +22 -22
birder/net/detection/fcos.py +8 -8
birder/net/detection/plain_detr.py +852 -0
birder/net/detection/retinanet.py +4 -4
birder/net/detection/rt_detr_v1.py +81 -25
birder/net/detection/rt_detr_v2.py +1147 -0
birder/net/detection/ssd.py +5 -5
birder/net/detection/yolo_v2.py +12 -12
birder/net/detection/yolo_v3.py +19 -19
birder/net/detection/yolo_v4.py +16 -16
birder/net/detection/yolo_v4_tiny.py +3 -3
birder/net/dpn.py +1 -2
birder/net/edgenext.py +5 -4
birder/net/edgevit.py +13 -14
birder/net/efficientformer_v1.py +3 -2
birder/net/efficientformer_v2.py +18 -31
birder/net/efficientnet_v2.py +3 -0
birder/net/efficientvim.py +9 -9
birder/net/efficientvit_mit.py +7 -7
birder/net/efficientvit_msft.py +3 -3
birder/net/fasternet.py +3 -3
birder/net/fastvit.py +5 -12
birder/net/flexivit.py +50 -58
birder/net/focalnet.py +5 -9
birder/net/gc_vit.py +11 -11
birder/net/ghostnet_v1.py +1 -1
birder/net/ghostnet_v2.py +1 -1
birder/net/groupmixformer.py +13 -13
birder/net/hgnet_v1.py +6 -6
birder/net/hgnet_v2.py +4 -4
birder/net/hiera.py +6 -6
birder/net/hieradet.py +9 -9
birder/net/hornet.py +3 -3
birder/net/iformer.py +4 -4
birder/net/inception_next.py +5 -15
birder/net/inception_resnet_v1.py +3 -3
birder/net/inception_resnet_v2.py +7 -4
birder/net/inception_v3.py +3 -0
birder/net/inception_v4.py +3 -0
birder/net/levit.py +3 -3
birder/net/lit_v1.py +13 -15
birder/net/lit_v1_tiny.py +9 -9
birder/net/lit_v2.py +14 -15
birder/net/maxvit.py +11 -23
birder/net/metaformer.py +5 -5
birder/net/mim/crossmae.py +6 -6
birder/net/mim/fcmae.py +3 -5
birder/net/mim/mae_hiera.py +7 -7
birder/net/mim/mae_vit.py +4 -6
birder/net/mim/simmim.py +3 -4
birder/net/mobilenet_v1.py +0 -9
birder/net/mobilenet_v2.py +38 -44
birder/net/{mobilenet_v3_large.py → mobilenet_v3.py} +37 -10
birder/net/mobilenet_v4_hybrid.py +4 -4
birder/net/mobileone.py +5 -12
birder/net/mobilevit_v1.py +7 -34
birder/net/mobilevit_v2.py +6 -54
birder/net/moganet.py +8 -5
birder/net/mvit_v2.py +30 -30
birder/net/nextvit.py +2 -2
birder/net/nfnet.py +4 -0
birder/net/pit.py +11 -26
birder/net/pvt_v1.py +9 -9
birder/net/pvt_v2.py +10 -16
birder/net/regionvit.py +15 -15
birder/net/regnet.py +1 -1
birder/net/repghost.py +5 -35
birder/net/repvgg.py +3 -5
birder/net/repvit.py +2 -2
birder/net/resmlp.py +2 -2
birder/net/resnest.py +4 -1
birder/net/resnet_v1.py +125 -1
birder/net/resnet_v2.py +75 -1
birder/net/resnext.py +35 -1
birder/net/rope_deit3.py +62 -151
birder/net/rope_flexivit.py +46 -33
birder/net/rope_vit.py +44 -758
birder/net/sequencer2d.py +3 -4
birder/net/shufflenet_v1.py +1 -1
birder/net/shufflenet_v2.py +1 -1
birder/net/simple_vit.py +69 -21
birder/net/smt.py +8 -8
birder/net/squeezenet.py +5 -12
birder/net/squeezenext.py +0 -24
birder/net/ssl/barlow_twins.py +1 -1
birder/net/ssl/byol.py +2 -2
birder/net/ssl/capi.py +4 -4
birder/net/ssl/data2vec.py +1 -1
birder/net/ssl/data2vec2.py +1 -1
birder/net/ssl/dino_v2.py +13 -3
birder/net/ssl/franca.py +28 -4
birder/net/ssl/i_jepa.py +5 -5
birder/net/ssl/ibot.py +1 -1
birder/net/ssl/mmcr.py +1 -1
birder/net/swiftformer.py +13 -3
birder/net/swin_transformer_v1.py +4 -5
birder/net/swin_transformer_v2.py +5 -8
birder/net/tiny_vit.py +6 -19
birder/net/transnext.py +19 -19
birder/net/uniformer.py +4 -4
birder/net/van.py +2 -2
birder/net/vgg.py +1 -10
birder/net/vit.py +72 -987
birder/net/vit_parallel.py +35 -20
birder/net/vit_sam.py +23 -48
birder/net/vovnet_v2.py +1 -1
birder/net/xcit.py +16 -13
birder/ops/msda.py +4 -4
birder/ops/swattention.py +10 -10
birder/results/classification.py +3 -3
birder/results/gui.py +8 -8
birder/scripts/benchmark.py +37 -12
birder/scripts/evaluate.py +1 -1
birder/scripts/predict.py +3 -3
birder/scripts/predict_detection.py +2 -2
birder/scripts/train.py +63 -15
birder/scripts/train_barlow_twins.py +10 -7
birder/scripts/train_byol.py +10 -7
birder/scripts/train_capi.py +15 -10
birder/scripts/train_data2vec.py +10 -7
birder/scripts/train_data2vec2.py +10 -7
birder/scripts/train_detection.py +29 -14
birder/scripts/train_dino_v1.py +13 -9
birder/scripts/train_dino_v2.py +27 -14
birder/scripts/train_dino_v2_dist.py +28 -15
birder/scripts/train_franca.py +16 -9
birder/scripts/train_i_jepa.py +12 -9
birder/scripts/train_ibot.py +15 -11
birder/scripts/train_kd.py +64 -17
birder/scripts/train_mim.py +11 -8
birder/scripts/train_mmcr.py +11 -8
birder/scripts/train_rotnet.py +11 -7
birder/scripts/train_simclr.py +10 -7
birder/scripts/train_vicreg.py +10 -7
birder/tools/adversarial.py +4 -4
birder/tools/auto_anchors.py +5 -5
birder/tools/avg_model.py +1 -1
birder/tools/convert_model.py +30 -22
birder/tools/det_results.py +1 -1
birder/tools/download_model.py +1 -1
birder/tools/ensemble_model.py +1 -1
birder/tools/introspection.py +12 -3
birder/tools/labelme_to_coco.py +2 -2
birder/tools/model_info.py +15 -15
birder/tools/pack.py +8 -8
birder/tools/quantize_model.py +53 -4
birder/tools/results.py +2 -2
birder/tools/show_det_iterator.py +19 -6
birder/tools/show_iterator.py +2 -2
birder/tools/similarity.py +5 -5
birder/tools/stats.py +4 -6
birder/tools/voc_to_coco.py +1 -1
birder/version.py +1 -1
{birder-0.3.3.dist-info → birder-0.4.1.dist-info}/METADATA +3 -3
birder-0.4.1.dist-info/RECORD +300 -0
{birder-0.3.3.dist-info → birder-0.4.1.dist-info}/WHEEL +1 -1
birder/net/mobilenet_v3_small.py +0 -43
birder/net/se_resnet_v1.py +0 -105
birder/net/se_resnet_v2.py +0 -59
birder/net/se_resnext.py +0 -30
birder-0.3.3.dist-info/RECORD +0 -299
{birder-0.3.3.dist-info → birder-0.4.1.dist-info}/entry_points.txt +0 -0
{birder-0.3.3.dist-info → birder-0.4.1.dist-info}/licenses/LICENSE +0 -0
{birder-0.3.3.dist-info → birder-0.4.1.dist-info}/top_level.txt +0 -0

birder/adversarial/base.py CHANGED Viewed

@@ -56,7 +56,7 @@ def pixel_eps_to_normalized(
 def clamp_normalized(inputs: torch.Tensor, rgb_stats: RGBType) -> torch.Tensor:
-    (min_val, max_val) = normalized_bounds(rgb_stats, device=inputs.device, dtype=inputs.dtype)
+    min_val, max_val = normalized_bounds(rgb_stats, device=inputs.device, dtype=inputs.dtype)
     return torch.clamp(inputs, min=min_val, max=max_val)

birder/adversarial/simba.py CHANGED Viewed

@@ -87,7 +87,7 @@ class SimBA:
         if self._is_successful(current_logits, label, target_label):
             return adv_inputs.detach(), num_queries
-        (_, channels, height, width) = adv_inputs.shape
+        _, channels, height, width = adv_inputs.shape
         num_dims = channels * height * width
         step = pixel_eps_to_normalized(self.step_size, self.rgb_stats, device=adv_inputs.device, dtype=adv_inputs.dtype)
         step_vals = step.view(-1)  # Per-channel steps
@@ -98,11 +98,11 @@ class SimBA:
         # Coordinate-wise search in random order
         for flat_idx in perm[:num_steps]:
-            (c, rem) = divmod(int(flat_idx.item()), stride)
-            (h, w) = divmod(rem, width)
+            c, rem = divmod(int(flat_idx.item()), stride)
+            h, w = divmod(rem, width)
             step_val = step_vals[c]
-            (candidate_inputs, candidate_logits, candidate_objective) = self._best_candidate(
+            candidate_inputs, candidate_logits, candidate_objective = self._best_candidate(
                 adv_inputs, c, h, w, step_val, label, target_label
             )
             num_queries += 2

birder/common/cli.py CHANGED Viewed

@@ -49,7 +49,7 @@ class FlexibleDictAction(argparse.Action):
             new_dict = {}
             for pair in pairs:
                 # Split each pair into key and value
-                (key, value) = pair.split("=", 1)
+                key, value = pair.split("=", 1)
                 key = key.strip()
                 # Try to safely evaluate the value (handles ints and strings mostly)

birder/common/fs_ops.py CHANGED Viewed

@@ -158,7 +158,7 @@ def model_path(
         file_name = f"{file_name}_quantized"
     if states is True:
-        file_name = f"{file_name}_states"
+        file_name = f"{file_name}_states.pt"
     elif lite is True:
         file_name = f"{file_name}.ptl"
     elif pt2 is True:
@@ -254,7 +254,7 @@ def clean_checkpoints(network_name: str, keep_last: int) -> None:
     models_glob = str(model_path(network_name, epoch=epoch))
     states_glob = str(model_path(network_name, epoch=epoch, states=True))
     model_pattern = re.compile(r".*_([1-9][0-9]*)\.pt$")
-    states_pattern = re.compile(r".*_([1-9][0-9]*)_states$")
+    states_pattern = re.compile(r".*_([1-9][0-9]*)_states\.pt$")
     model_paths = list(settings.BASE_DIR.glob(models_glob))
     for p in sorted(model_paths, key=lambda p: p.stat().st_mtime)[:-keep_last]:
@@ -384,7 +384,7 @@ def load_checkpoint(
     )
     # Initialize network and restore checkpoint state
-    net = registry.net_factory(network, input_channels, num_classes, config=config, size=size)
+    net = registry.net_factory(network, num_classes, input_channels, config=config, size=size)
     # When a checkpoint was trained with EMA:
     #   The primary weights in the checkpoint file are the EMA weights
@@ -437,7 +437,7 @@ def load_mim_checkpoint(
     size = lib.get_size_from_signature(signature)
     # Initialize network and restore checkpoint state
-    net_encoder = registry.net_factory(encoder, input_channels, num_classes, config=encoder_config, size=size)
+    net_encoder = registry.net_factory(encoder, num_classes, input_channels, config=encoder_config, size=size)
     net = registry.mim_net_factory(
         network, net_encoder, config=config, size=size, mask_ratio=mask_ratio, min_mask_size=min_mask_size
     )
@@ -488,7 +488,7 @@ def load_detection_checkpoint(
     size = lib.get_size_from_signature(signature)
     # Initialize network and restore checkpoint state
-    net_backbone = registry.net_factory(backbone, input_channels, num_classes, config=backbone_config, size=size)
+    net_backbone = registry.net_factory(backbone, num_classes, input_channels, config=backbone_config, size=size)
     net = registry.detection_net_factory(network, num_classes, net_backbone, config=config, size=size)
     # When a checkpoint was trained with EMA:
@@ -584,7 +584,7 @@ def load_model(
             merged_config = None  # type: ignore[assignment]
         model_state: dict[str, Any] = safetensors.torch.load_file(path, device=device.type)
-        net = registry.net_factory(network, input_channels, num_classes, config=merged_config, size=size)
+        net = registry.net_factory(network, num_classes, input_channels, config=merged_config, size=size)
         if reparameterized is True:
             net.reparameterize_model()
@@ -611,7 +611,7 @@ def load_model(
         if len(merged_config) == 0:
             merged_config = None
-        net = registry.net_factory(network, input_channels, num_classes, config=merged_config, size=size)
+        net = registry.net_factory(network, num_classes, input_channels, config=merged_config, size=size)
         if reparameterized is True:
             net.reparameterize_model()
@@ -733,7 +733,7 @@ def load_detection_model(
         model_state: dict[str, Any] = safetensors.torch.load_file(path, device=device.type)
         net_backbone = registry.net_factory(
-            backbone, input_channels, num_classes, config=backbone_merged_config, size=size
+            backbone, num_classes, input_channels, config=backbone_merged_config, size=size
         )
         if backbone_reparameterized is True:
             net_backbone.reparameterize_model()
@@ -776,7 +776,7 @@ def load_detection_model(
             merged_config = None
         net_backbone = registry.net_factory(
-            backbone, input_channels, num_classes, config=backbone_merged_config, size=size
+            backbone, num_classes, input_channels, config=backbone_merged_config, size=size
         )
         if backbone_reparameterized is True:
             net_backbone.reparameterize_model()
@@ -959,7 +959,7 @@ def load_model_with_cfg(
             encoder_name = cfg["encoder"]
         encoder_config = cfg.get("encoder_config", None)
-        encoder = registry.net_factory(encoder_name, input_channels, num_classes=0, config=encoder_config, size=size)
+        encoder = registry.net_factory(encoder_name, 0, input_channels, config=encoder_config, size=size)
         net = registry.mim_net_factory(name, encoder, config=model_config, size=size)
     elif cfg["task"] == Task.OBJECT_DETECTION:
@@ -969,14 +969,14 @@ def load_model_with_cfg(
             backbone_name = cfg["backbone"]
         backbone_config = cfg.get("backbone_config", None)
-        backbone = registry.net_factory(backbone_name, input_channels, num_classes, config=backbone_config, size=size)
+        backbone = registry.net_factory(backbone_name, num_classes, input_channels, config=backbone_config, size=size)
         if cfg.get("backbone_reparameterized", False) is True:
             backbone.reparameterize_model()
         net = registry.detection_net_factory(name, num_classes, backbone, config=model_config, size=size)
     elif cfg["task"] == Task.IMAGE_CLASSIFICATION:
-        net = registry.net_factory(name, input_channels, num_classes, config=model_config, size=size)
+        net = registry.net_factory(name, num_classes, input_channels, config=model_config, size=size)
     else:
         raise ValueError(f"Configuration not supported: {cfg['task']}")
@@ -1019,7 +1019,7 @@ def download_model_by_weights(
             f"Requested format '{file_format}' not available for {weights}, available formats are: {available_formats}"
         )
-    (model_file, url) = get_pretrained_model_url(weights, file_format)
+    model_file, url = get_pretrained_model_url(weights, file_format)
     if dst is None:
         dst = settings.MODELS_DIR.joinpath(model_file)

birder/common/lib.py CHANGED Viewed

@@ -157,6 +157,6 @@ def get_pretrained_model_url(weights: str, file_format: str) -> tuple[str, str]:
 def format_duration(seconds: float) -> str:
     s = int(seconds)
-    (mm, ss) = divmod(s, 60)
-    (hh, mm) = divmod(mm, 60)
+    mm, ss = divmod(s, 60)
+    hh, mm = divmod(mm, 60)
     return f"{hh:d}:{mm:02d}:{ss:02d}"

birder/common/masking.py CHANGED Viewed

@@ -16,7 +16,7 @@ def _mask_token_omission(
     Parameters
     ----------
     x
-        Tensor of shape (N, L, D), where N is the batch size, L is the sequence length, and D is the feature dimension.
+        Tensor of shape (N, L, D), where N is the batch size, L is the sequence length and D is the feature dimension.
     mask_ratio
         The ratio of the sequence length to be masked. This value should be between 0 and 1.
     kept_mask_ratio
@@ -48,7 +48,7 @@ def _mask_token_omission(
     # Masking: length -> length * mask_ratio
     # Perform per-sample random masking by per-sample shuffling.
     # Per-sample shuffling is done by argsort random noise.
-    (N, L, D) = x.size()  # batch, length, dim
+    N, L, D = x.size()  # batch, length, dim
     len_keep = int(L * (1 - mask_ratio))
     len_masked = int(L * (mask_ratio - kept_mask_ratio))
@@ -82,7 +82,7 @@ def mask_tensor(
     if channels_last is False:
         x = x.permute(0, 2, 3, 1)
-    (B, H, W, _) = x.size()
+    B, H, W, _ = x.size()
     shaped_mask = mask.reshape(B, H // patch_factor, W // patch_factor)
     shaped_mask = shaped_mask.repeat_interleave(patch_factor, dim=1).repeat_interleave(patch_factor, dim=2)

birder/common/training_cli.py CHANGED Viewed

@@ -13,6 +13,7 @@ from birder.conf import settings
 from birder.data.datasets.coco import MosaicType
 from birder.data.transforms.classification import AugType
 from birder.data.transforms.classification import RGBMode
+from birder.data.transforms.detection import MULTISCALE_STEP
 from birder.data.transforms.detection import AugType as DetAugType
 logger = logging.getLogger(__name__)
@@ -199,10 +200,16 @@ def add_detection_input_args(parser: argparse.ArgumentParser) -> None:
         action="store_true",
         help="enable random square resize once per batch (capped by max(--size))",
     )
+    group.add_argument(
+        "--multiscale-step",
+        type=int,
+        default=MULTISCALE_STEP,
+        help="step size for multiscale size lists and collator padding divisibility (size_divisible)",
+    )
     group.add_argument(
         "--multiscale-min-size",
         type=int,
-        help="minimum short-edge size for multiscale lists (rounded up to nearest multiple of 32)",
+        help="minimum short-edge size for multiscale lists (rounded up to nearest multiple of --multiscale-step)",
     )
@@ -515,7 +522,10 @@ def add_distributed_args(parser: argparse.ArgumentParser) -> None:
 def add_logging_and_debug_args(
-    parser: argparse.ArgumentParser, default_log_interval: int = 50, fake_data: bool = True
+    parser: argparse.ArgumentParser,
+    default_log_interval: int = 50,
+    fake_data: bool = True,
+    classification: bool = False,
 ) -> None:
     group = parser.add_argument_group("Logging and debugging parameters")
     group.add_argument(
@@ -525,6 +535,11 @@ def add_logging_and_debug_args(
         metavar="NAME",
         help="experiment name for logging (creates dedicated directory for the run)",
     )
+    if classification is True:
+        group.add_argument(
+            "--top-k", type=int, metavar="K", help="additional top-k accuracy value to track (top-1 is always tracked)"
+        )
     group.add_argument(
         "--log-interval",
         type=int,
@@ -746,3 +761,10 @@ def common_args_validation(args: argparse.Namespace) -> None:
     # Precision_args, shared by all scripts
     if args.amp is True and args.model_dtype != "float32":
         raise ValidationError("--amp can only be used with --model-dtype float32")
+    if hasattr(args, "top_k") is True and args.top_k is not None:
+        if args.top_k == 1:
+            raise ValidationError("Top-1 accuracy is tracked by default, please remove 1 from --top-k argument")
+        if args.top_k <= 0:
+            raise ValidationError("--top-k value must be a positive integer")

birder/common/training_utils.py CHANGED Viewed

@@ -11,6 +11,7 @@ from collections import deque
 from collections.abc import Callable
 from collections.abc import Generator
 from collections.abc import Iterator
+from collections.abc import Sequence
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -361,7 +362,7 @@ def optimizer_parameter_groups(
     Return parameter groups for optimizers with per-parameter group weight decay.
     This function creates parameter groups with customizable weight decay, layer-wise
-    learning rate scaling, and special handling for different parameter types. It supports
+    learning rate scaling and special handling for different parameter types. It supports
     advanced optimization techniques like layer decay and custom weight decay rules.
     Referenced from https://github.com/pytorch/vision/blob/main/references/classification/utils.py and from
@@ -450,7 +451,7 @@ def optimizer_parameter_groups(
     visited_modules = []
     while len(module_stack_with_prefix) > 0:  # pylint: disable=too-many-nested-blocks
         skip_module = False
-        (module, prefix) = module_stack_with_prefix.pop()
+        module, prefix = module_stack_with_prefix.pop()
         if id(module) in visited_modules:
             skip_module = True
@@ -884,6 +885,11 @@ class SmoothedValue:
         self.total: torch.Tensor | float = 0.0
         self.count: int = 0
+    def clear(self) -> None:
+        self.deque.clear()
+        self.total = 0.0
+        self.count = 0
     def update(self, value: torch.Tensor | float, n: int = 1) -> None:
         self.deque.append(value)
         self.count += n
@@ -927,14 +933,32 @@ class SmoothedValue:
         return to_tensor(v, torch.device("cpu")).item()  # type: ignore[no-any-return]
-def accuracy(y_true: torch.Tensor, y_pred: torch.Tensor) -> float:
+@torch.no_grad()  # type: ignore[untyped-decorator]
+def accuracy(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
     if y_pred.dim() > 1 and y_pred.size(1) > 1:
         y_pred = y_pred.argmax(dim=1)
     y_true = y_true.flatten()
     y_pred = y_pred.flatten()
-    return (y_true == y_pred).float().mean().item()  # type: ignore[no-any-return]
+    return (y_true == y_pred).sum() / y_true.numel()
+@torch.no_grad()  # type: ignore[untyped-decorator]
+def topk_accuracy(y_true: torch.Tensor, y_pred: torch.Tensor, topk: Sequence[int]) -> list[torch.Tensor]:
+    maxk = min(max(topk), y_pred.size(1))
+    batch_size = y_true.size(0)
+    _, pred = y_pred.topk(maxk, dim=1, largest=True, sorted=True)
+    correct = pred.eq(y_true.unsqueeze(1))
+    res: list[torch.Tensor] = []
+    for k in topk:
+        k = min(k, maxk)
+        correct_k = correct[:, :k].any(dim=1).sum(dtype=torch.float32)
+        res.append((correct_k / batch_size))
+    return res
 ###############################################################################

birder/data/collators/detection.py CHANGED Viewed

@@ -70,13 +70,21 @@ class BatchRandomResizeCollator(DetectionCollator):
         size: tuple[int, int],
         size_divisible: int = 32,
         multiscale_min_size: Optional[int] = None,
+        multiscale_step: Optional[int] = None,
     ) -> None:
         super().__init__(input_offset, size_divisible=size_divisible)
         if size is None:
             raise ValueError("size must be provided for batch multiscale")
         max_side = max(size)
-        sizes = [side for side in build_multiscale_sizes(multiscale_min_size) if side <= max_side]
+        if multiscale_step is None:
+            multiscale_step = size_divisible
+        sizes = []
+        for side in build_multiscale_sizes(multiscale_min_size, multiscale_step=multiscale_step):
+            if side <= max_side:
+                sizes.append(side)
         if len(sizes) == 0:
             sizes = [max_side]

birder/data/transforms/detection.py CHANGED Viewed

@@ -17,17 +17,20 @@ DEFAULT_MULTISCALE_MAX_SIZE = 800
 def build_multiscale_sizes(
-    min_size: Optional[int] = None, max_size: int = DEFAULT_MULTISCALE_MAX_SIZE
+    min_size: Optional[int] = None, max_size: int = DEFAULT_MULTISCALE_MAX_SIZE, multiscale_step: int = MULTISCALE_STEP
 ) -> tuple[int, ...]:
+    if multiscale_step <= 0:
+        raise ValueError("multiscale_step must be positive")
     if min_size is None:
         min_size = DEFAULT_MULTISCALE_MIN_SIZE
-    start = int(math.ceil(min_size / MULTISCALE_STEP) * MULTISCALE_STEP)
-    end = int(math.floor(max_size / MULTISCALE_STEP) * MULTISCALE_STEP)
+    start = int(math.ceil(min_size / multiscale_step) * multiscale_step)
+    end = int(math.floor(max_size / multiscale_step) * multiscale_step)
     if end < start:
         return (start,)
-    return tuple(range(start, end + 1, MULTISCALE_STEP))
+    return tuple(range(start, end + 1, multiscale_step))
 class ResizeWithRandomInterpolation(nn.Module):
@@ -59,6 +62,7 @@ def get_birder_augment(
     multiscale: bool,
     max_size: Optional[int],
     multiscale_min_size: Optional[int],
+    multiscale_step: int = MULTISCALE_STEP,
     post_mosaic: bool = False,
 ) -> Callable[..., torch.Tensor]:
     if dynamic_size is True:
@@ -98,7 +102,10 @@ def get_birder_augment(
     # Resize
     if multiscale is True:
         transformations.append(
-            v2.RandomShortestSize(min_size=build_multiscale_sizes(multiscale_min_size), max_size=max_size or 1333),
+            v2.RandomShortestSize(
+                min_size=build_multiscale_sizes(multiscale_min_size, multiscale_step=multiscale_step),
+                max_size=max_size or 1333,
+            ),
         )
     else:
         transformations.append(
@@ -160,6 +167,7 @@ def training_preset(
     multiscale: bool = False,
     max_size: Optional[int] = None,
     multiscale_min_size: Optional[int] = None,
+    multiscale_step: int = MULTISCALE_STEP,
     post_mosaic: bool = False,
 ) -> Callable[..., torch.Tensor]:
     mean = rgv_values["mean"]
@@ -180,7 +188,15 @@ def training_preset(
             [
                 v2.ToImage(),
                 get_birder_augment(
-                    size, level, fill_value, dynamic_size, multiscale, max_size, multiscale_min_size, post_mosaic
+                    size,
+                    level,
+                    fill_value,
+                    dynamic_size,
+                    multiscale,
+                    max_size,
+                    multiscale_min_size,
+                    multiscale_step,
+                    post_mosaic,
                 ),
                 v2.ToDtype(torch.float32, scale=True),
                 v2.Normalize(mean=mean, std=std),
@@ -212,7 +228,10 @@ def training_preset(
         return v2.Compose(  # type: ignore
             [
                 v2.ToImage(),
-                v2.RandomShortestSize(min_size=build_multiscale_sizes(multiscale_min_size), max_size=max_size or 1333),
+                v2.RandomShortestSize(
+                    min_size=build_multiscale_sizes(multiscale_min_size, multiscale_step=multiscale_step),
+                    max_size=max_size or 1333,
+                ),
                 v2.RandomHorizontalFlip(0.5),
                 v2.SanitizeBoundingBoxes(),
                 v2.ToDtype(torch.float32, scale=True),
@@ -284,7 +303,7 @@ def training_preset(
         )
     if aug_type == "detr":
-        multiscale_sizes = build_multiscale_sizes(multiscale_min_size)
+        multiscale_sizes = build_multiscale_sizes(multiscale_min_size, multiscale_step=multiscale_step)
         return v2.Compose(  # type: ignore
             [
                 v2.ToImage(),

birder/data/transforms/mosaic.py CHANGED Viewed

@@ -19,7 +19,7 @@ def mosaic_random_center(
     Create a mosaic augmentation by combining 4 images into a single image.
     This augmentation places 4 images on a canvas, meeting at a randomly selected
-    center point. Each image is scaled to fit, cropped as needed, and their bounding
+    center point. Each image is scaled to fit, cropped as needed and their bounding
     boxes are transformed accordingly.
     Parameters

birder/datahub/classification.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TestDataset(ImageFolder):
         super().__init__(self._root.joinpath(split), transform, target_transform, loader, is_valid_file)
     def __getitem__(self, index: int) -> tuple[str, torch.Tensor, Any]:
-        (path, target) = self.samples[index]
+        path, target = self.samples[index]
         sample = self.loader(path)
         if self.transform is not None:
             sample = self.transform(sample)
@@ -122,7 +122,7 @@ class Flowers102(ImageFolder):
         super().__init__(self._root.joinpath(split), transform, target_transform, loader, is_valid_file)
     def __getitem__(self, index: int) -> tuple[str, torch.Tensor, Any]:
-        (path, target) = self.samples[index]
+        path, target = self.samples[index]
         sample = self.loader(path)
         if self.transform is not None:
             sample = self.transform(sample)
@@ -182,7 +182,7 @@ class CUB_200_2011(ImageFolder):
         super().__init__(self._root.joinpath(split), transform, target_transform, loader, is_valid_file)
     def __getitem__(self, index: int) -> tuple[str, torch.Tensor, Any]:
-        (path, target) = self.samples[index]
+        path, target = self.samples[index]
         sample = self.loader(path)
         if self.transform is not None:
             sample = self.transform(sample)

birder/inference/classification.py CHANGED Viewed

@@ -75,7 +75,7 @@ def infer_batch(
         embedding = embedding_tensor.cpu().float().numpy()
     elif tta is True:
-        (_, _, H, W) = inputs.size()
+        _, _, H, W = inputs.size()
         crop_h = int(H * 0.8)
         crop_w = int(W * 0.8)
         tta_inputs = five_crop(inputs, size=[crop_h, crop_w])
@@ -137,7 +137,7 @@ def infer_dataloader_iter(
             inputs = inputs.to(device, dtype=model_dtype)
             with torch.amp.autocast(device.type, enabled=amp, dtype=amp_dtype):
-                (out, embedding) = infer_batch(
+                out, embedding = infer_batch(
                     net, inputs, return_embedding=return_embedding, tta=tta, return_logits=return_logits, **kwargs
                 )
@@ -394,7 +394,7 @@ def evaluate(
     num_samples: Optional[int] = None,
     sparse: bool = False,
 ) -> Results | SparseResults:
-    (sample_paths, outs, labels, _) = infer_dataloader(
+    sample_paths, outs, labels, _ = infer_dataloader(
         device, net, dataloader, tta=tta, model_dtype=model_dtype, amp=amp, amp_dtype=amp_dtype, num_samples=num_samples
     )
     if sparse is True:

birder/inference/data_parallel.py CHANGED Viewed

@@ -253,7 +253,7 @@ class InferenceDataParallel(nn.Module):
         This allows custom methods (e.g., model.embedding()) to be called
         on the InferenceDataParallel instance, which then scatters inputs,
-        calls the method on each replica, and gathers the results.
+        calls the method on each replica and gathers the results.
         Parameters
         ----------

birder/inference/detection.py CHANGED Viewed

@@ -20,7 +20,7 @@ def _normalize_image_sizes(inputs: torch.Tensor, image_sizes: Optional[list[list
     if image_sizes is not None:
         return image_sizes
-    (_, _, height, width) = inputs.shape
+    _, _, height, width = inputs.shape
     return [[height, width] for _ in range(inputs.size(0))]
@@ -149,20 +149,20 @@ def infer_batch(
     **kwargs: Any,
 ) -> list[dict[str, torch.Tensor]]:
     if tta is False:
-        (detections, _) = net(inputs, masks=masks, image_sizes=image_sizes, **kwargs)
+        detections, _ = net(inputs, masks=masks, image_sizes=image_sizes, **kwargs)
         return detections  # type: ignore[no-any-return]
     normalized_sizes = _normalize_image_sizes(inputs, image_sizes)
     detections_list: list[list[dict[str, torch.Tensor]]] = []
     for scale in (0.8, 1.0, 1.2):
-        (scaled_inputs, scaled_masks, scaled_sizes) = _resize_batch(inputs, normalized_sizes, scale, size_divisible=32)
-        (detections, _) = net(scaled_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
+        scaled_inputs, scaled_masks, scaled_sizes = _resize_batch(inputs, normalized_sizes, scale, size_divisible=32)
+        detections, _ = net(scaled_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
         detections = _rescale_detections(detections, scaled_sizes, normalized_sizes)
         detections_list.append(detections)
         flipped_inputs = _hflip_inputs(scaled_inputs, scaled_sizes)
-        (flipped_detections, _) = net(flipped_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
+        flipped_detections, _ = net(flipped_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
         flipped_detections = _invert_detections(flipped_detections, scaled_sizes)
         flipped_detections = _rescale_detections(flipped_detections, scaled_sizes, normalized_sizes)
         detections_list.append(flipped_detections)

birder/inference/wbf.py CHANGED Viewed

@@ -182,7 +182,7 @@ def fuse_detections_wbf_single(
     scores_list = [detection["scores"] for detection in detections]
     labels_list = [detection["labels"] for detection in detections]
-    (boxes, scores, labels) = weighted_boxes_fusion(
+    boxes, scores, labels = weighted_boxes_fusion(
         boxes_list,
         scores_list,
         labels_list,

birder/introspection/attention_rollout.py CHANGED Viewed

@@ -70,7 +70,7 @@ def compute_rollout(
                     num_to_discard = int(num_allowed * discard_ratio)
                     if num_to_discard > 0:
                         # Drop the smallest allowed values
-                        (_, low_idx) = torch.topk(allowed_values, num_to_discard, largest=False)
+                        _, low_idx = torch.topk(allowed_values, num_to_discard, largest=False)
                         allowed_values[low_idx] = 0
                         attn[allow] = allowed_values
                         attention_heads_fused[0] = attn
@@ -97,7 +97,7 @@ def compute_rollout(
     # Normalize and reshape to 2D map using actual patch grid dimensions
     mask = mask / (mask.max() + 1e-8)
-    (grid_h, grid_w) = patch_grid_shape
+    grid_h, grid_w = patch_grid_shape
     mask = mask.reshape(grid_h, grid_w)
     return mask
@@ -141,7 +141,7 @@ class AttentionRollout:
         net: nn.Module,
         device: torch.device,
         transform: Callable[..., torch.Tensor],
-        attention_layer_name: str = "self_attention",
+        attention_layer_name: str = "attn",
         discard_ratio: float = 0.9,
         head_fusion: Literal["mean", "max", "min"] = "max",
     ) -> None:
@@ -156,11 +156,11 @@ class AttentionRollout:
         self.attention_gatherer = AttentionGatherer(net, attention_layer_name)
     def __call__(self, image: str | Path | Image.Image, target_class: Optional[int] = None) -> InterpretabilityResult:
-        (input_tensor, rgb_img) = preprocess_image(image, self.transform, self.device)
+        input_tensor, rgb_img = preprocess_image(image, self.transform, self.device)
-        (attentions, logits) = self.attention_gatherer(input_tensor)
+        attentions, logits = self.attention_gatherer(input_tensor)
-        (_, _, H, W) = input_tensor.shape
+        _, _, H, W = input_tensor.shape
         patch_grid_shape = (H // self.net.stem_stride, W // self.net.stem_stride)
         attention_map = compute_rollout(

birder/introspection/feature_pca.py CHANGED Viewed

@@ -17,7 +17,7 @@ class FeaturePCA:
     Visualizes feature maps using Principal Component Analysis
     This method extracts feature maps from a specified stage of a DetectorBackbone model,
-    applies PCA to reduce the channel dimension to 3 components, and visualizes them as an RGB image where:
+    applies PCA to reduce the channel dimension to 3 components and visualizes them as an RGB image where:
     - R channel = 1st principal component (most important)
     - G channel = 2nd principal component
     - B channel = 3rd principal component
@@ -40,7 +40,7 @@ class FeaturePCA:
         self.stage = stage
     def __call__(self, image: str | Path | Image.Image) -> InterpretabilityResult:
-        (input_tensor, rgb_img) = preprocess_image(image, self.transform, self.device)
+        input_tensor, rgb_img = preprocess_image(image, self.transform, self.device)
         with torch.inference_mode():
             features_dict = self.net.detection_features(input_tensor)
@@ -54,11 +54,11 @@ class FeaturePCA:
         # Handle channels_last format (B, H, W, C) vs channels_first (B, C, H, W)
         if self.channels_last is True:
-            (B, H, W, C) = features_np.shape
+            B, H, W, C = features_np.shape
             # Already in (B, H, W, C), just reshape to (B*H*W, C)
             features_reshaped = features_np.reshape(-1, C)
         else:
-            (B, C, H, W) = features_np.shape
+            B, C, H, W = features_np.shape
             # Reshape to (spatial_points, channels) for PCA
             features_reshaped = features_np.reshape(B, C, -1)
             features_reshaped = features_reshaped.transpose(0, 2, 1)  # (B, H*W, C)

birder/introspection/gradcam.py CHANGED Viewed

@@ -98,7 +98,7 @@ class GradCAM:
         self.activation_capture = ActivationCapture(net, target_layer, reshape_transform)
     def __call__(self, image: str | Path | Image.Image, target_class: Optional[int] = None) -> InterpretabilityResult:
-        (input_tensor, rgb_img) = preprocess_image(image, self.transform, self.device)
+        input_tensor, rgb_img = preprocess_image(image, self.transform, self.device)
         input_tensor.requires_grad_(True)
         # Forward pass

birder 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

birder 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl