PyPI - birder - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

birder 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

birder/common/lib.py +2 -9
birder/common/training_cli.py +24 -0
birder/common/training_utils.py +338 -41
birder/data/collators/detection.py +11 -3
birder/data/dataloader/webdataset.py +12 -2
birder/data/datasets/coco.py +8 -10
birder/data/transforms/detection.py +30 -13
birder/inference/detection.py +108 -4
birder/inference/wbf.py +226 -0
birder/kernels/load_kernel.py +16 -11
birder/kernels/soft_nms/soft_nms.cpp +17 -18
birder/net/__init__.py +8 -0
birder/net/cait.py +4 -3
birder/net/convnext_v1.py +5 -0
birder/net/crossformer.py +33 -30
birder/net/crossvit.py +4 -3
birder/net/deit.py +3 -3
birder/net/deit3.py +3 -3
birder/net/detection/deformable_detr.py +2 -5
birder/net/detection/detr.py +2 -5
birder/net/detection/efficientdet.py +67 -93
birder/net/detection/fcos.py +2 -7
birder/net/detection/retinanet.py +2 -7
birder/net/detection/rt_detr_v1.py +2 -0
birder/net/detection/yolo_anchors.py +205 -0
birder/net/detection/yolo_v2.py +25 -24
birder/net/detection/yolo_v3.py +39 -40
birder/net/detection/yolo_v4.py +28 -26
birder/net/detection/yolo_v4_tiny.py +24 -20
birder/net/efficientformer_v1.py +15 -9
birder/net/efficientformer_v2.py +39 -29
birder/net/efficientvit_msft.py +9 -7
birder/net/fasternet.py +1 -1
birder/net/fastvit.py +1 -0
birder/net/flexivit.py +5 -4
birder/net/gc_vit.py +671 -0
birder/net/hiera.py +12 -9
birder/net/hornet.py +9 -7
birder/net/iformer.py +8 -6
birder/net/levit.py +42 -30
birder/net/lit_v1.py +472 -0
birder/net/lit_v1_tiny.py +357 -0
birder/net/lit_v2.py +436 -0
birder/net/maxvit.py +67 -55
birder/net/mobilenet_v4_hybrid.py +1 -1
birder/net/mobileone.py +1 -0
birder/net/mvit_v2.py +13 -12
birder/net/pit.py +4 -3
birder/net/pvt_v1.py +4 -1
birder/net/repghost.py +1 -0
birder/net/repvgg.py +1 -0
birder/net/repvit.py +1 -0
birder/net/resnet_v1.py +1 -1
birder/net/resnext.py +67 -25
birder/net/rope_deit3.py +5 -3
birder/net/rope_flexivit.py +7 -4
birder/net/rope_vit.py +10 -5
birder/net/se_resnet_v1.py +46 -0
birder/net/se_resnext.py +3 -0
birder/net/simple_vit.py +11 -8
birder/net/swin_transformer_v1.py +71 -68
birder/net/swin_transformer_v2.py +38 -31
birder/net/tiny_vit.py +20 -10
birder/net/transnext.py +38 -28
birder/net/vit.py +5 -19
birder/net/vit_parallel.py +5 -4
birder/net/vit_sam.py +38 -37
birder/net/vovnet_v1.py +15 -0
birder/net/vovnet_v2.py +31 -1
birder/ops/msda.py +108 -43
birder/ops/swattention.py +124 -61
birder/results/detection.py +4 -0
birder/scripts/benchmark.py +110 -32
birder/scripts/predict.py +8 -0
birder/scripts/predict_detection.py +18 -11
birder/scripts/train.py +48 -46
birder/scripts/train_barlow_twins.py +44 -45
birder/scripts/train_byol.py +44 -45
birder/scripts/train_capi.py +50 -49
birder/scripts/train_data2vec.py +45 -47
birder/scripts/train_data2vec2.py +45 -47
birder/scripts/train_detection.py +83 -50
birder/scripts/train_dino_v1.py +60 -47
birder/scripts/train_dino_v2.py +86 -52
birder/scripts/train_dino_v2_dist.py +84 -50
birder/scripts/train_franca.py +51 -52
birder/scripts/train_i_jepa.py +45 -47
birder/scripts/train_ibot.py +51 -53
birder/scripts/train_kd.py +194 -76
birder/scripts/train_mim.py +44 -45
birder/scripts/train_mmcr.py +44 -45
birder/scripts/train_rotnet.py +45 -46
birder/scripts/train_simclr.py +44 -45
birder/scripts/train_vicreg.py +44 -45
birder/tools/auto_anchors.py +20 -1
birder/tools/convert_model.py +18 -15
birder/tools/det_results.py +114 -2
birder/tools/pack.py +172 -103
birder/tools/quantize_model.py +73 -67
birder/tools/show_det_iterator.py +10 -1
birder/version.py +1 -1
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/METADATA +4 -3
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/RECORD +107 -101
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/WHEEL +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/entry_points.txt +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/licenses/LICENSE +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/top_level.txt +0 -0

birder/net/detection/yolo_anchors.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""
+Shared YOLO anchor definitions and helpers.
+"""
+import json
+from collections.abc import Sequence
+from typing import Any
+from typing import Literal
+from typing import NotRequired
+from typing import TypedDict
+from typing import overload
+AnchorPair = tuple[float, float]
+AnchorGroup = list[AnchorPair]
+AnchorGroups = list[AnchorGroup]
+AnchorLike = AnchorGroups | AnchorGroup
+# Default anchors from yolo.cfg (COCO dataset), in grid units
+YOLO_V2_ANCHORS: AnchorGroup = [
+    (0.57273, 0.677385),
+    (1.87446, 2.06253),
+    (3.33843, 5.47434),
+    (7.88282, 3.52778),
+    (9.77052, 9.16828),
+]
+# Default anchors from YOLO v3 paper (sorted by area, small to large)
+# These values are in absolute pixels (width, height) computed using K-Means
+# on the COCO dataset with a reference input size of 416x416.
+YOLO_V3_ANCHORS: AnchorGroups = [
+    [(10.0, 13.0), (16.0, 30.0), (33.0, 23.0)],  # Small objects (stride 8)
+    [(30.0, 61.0), (62.0, 45.0), (59.0, 119.0)],  # Medium objects (stride 16)
+    [(116.0, 90.0), (156.0, 198.0), (373.0, 326.0)],  # Large objects (stride 32)
+]
+# Default anchors from YOLO v4 (COCO), in pixels
+YOLO_V4_ANCHORS: AnchorGroups = [
+    [(12.0, 16.0), (19.0, 36.0), (40.0, 28.0)],  # Small
+    [(36.0, 75.0), (76.0, 55.0), (72.0, 146.0)],  # Medium
+    [(142.0, 110.0), (192.0, 243.0), (459.0, 401.0)],  # Large
+]
+# Default anchors from YOLO v4 Tiny (COCO), in pixels
+YOLO_V4_TINY_ANCHORS: AnchorGroups = [
+    [(10.0, 14.0), (23.0, 27.0), (37.0, 58.0)],  # Medium
+    [(81.0, 82.0), (135.0, 169.0), (344.0, 319.0)],  # Large
+]
+class AnchorPreset(TypedDict):
+    anchors: AnchorLike
+    format: Literal["grid", "pixels"]
+    size: tuple[int, int]
+    strides: NotRequired[Sequence[int]]
+ANCHOR_PRESETS: dict[str, AnchorPreset] = {
+    "yolo_v2": {"anchors": YOLO_V2_ANCHORS, "format": "grid", "size": (416, 416), "strides": (32,)},
+    "yolo_v3": {"anchors": YOLO_V3_ANCHORS, "format": "pixels", "size": (416, 416)},
+    "yolo_v4": {"anchors": YOLO_V4_ANCHORS, "format": "pixels", "size": (608, 608)},
+    "yolo_v4_tiny": {"anchors": YOLO_V4_TINY_ANCHORS, "format": "pixels", "size": (416, 416)},
+}
+@overload
+def scale_anchors(anchors: AnchorGroup, from_size: tuple[int, int], to_size: tuple[int, int]) -> AnchorGroup: ...
+@overload
+def scale_anchors(anchors: AnchorGroups, from_size: tuple[int, int], to_size: tuple[int, int]) -> AnchorGroups: ...
+def scale_anchors(anchors: AnchorLike, from_size: tuple[int, int], to_size: tuple[int, int]) -> AnchorLike:
+    (anchor_groups, single) = _normalize_anchor_groups(anchors)
+    if from_size == to_size:
+        # Avoid aliasing default anchors in case they are mutated later
+        scaled: AnchorGroups = [list(group) for group in anchor_groups]
+        if single is True:
+            return scaled[0]
+        return scaled
+    scale_h = to_size[0] / from_size[0]
+    scale_w = to_size[1] / from_size[1]
+    scaled = [[(w * scale_w, h * scale_h) for (w, h) in group] for group in anchor_groups]
+    if single is True:
+        return scaled[0]
+    return scaled
+@overload
+def pixels_to_grid(anchors: AnchorGroup, strides: Sequence[int]) -> AnchorGroup: ...
+@overload
+def pixels_to_grid(anchors: AnchorGroups, strides: Sequence[int]) -> AnchorGroups: ...
+def pixels_to_grid(anchors: AnchorLike, strides: Sequence[int]) -> AnchorLike:
+    (anchor_groups, single) = _normalize_anchor_groups(anchors)
+    if len(anchor_groups) != len(strides):
+        raise ValueError("strides must provide one value per anchor scale")
+    converted: AnchorGroups = []
+    for group, stride in zip(anchor_groups, strides):
+        converted.append([(w / stride, h / stride) for (w, h) in group])
+    if single is True:
+        return converted[0]
+    return converted
+@overload
+def grid_to_pixels(anchors: AnchorGroup, strides: Sequence[int]) -> AnchorGroup: ...
+@overload
+def grid_to_pixels(anchors: AnchorGroups, strides: Sequence[int]) -> AnchorGroups: ...
+def grid_to_pixels(anchors: AnchorLike, strides: Sequence[int]) -> AnchorLike:
+    (anchor_groups, single) = _normalize_anchor_groups(anchors)
+    if len(anchor_groups) != len(strides):
+        raise ValueError("strides must provide one value per anchor scale")
+    converted: AnchorGroups = []
+    for group, stride in zip(anchor_groups, strides):
+        converted.append([(w * stride, h * stride) for (w, h) in group])
+    if single is True:
+        return converted[0]
+    return converted
+def _normalize_anchor_groups(anchors: AnchorLike) -> tuple[AnchorGroups, bool]:
+    if len(anchors) > 0 and _is_anchor_pair(anchors[0]) is True:
+        return ([anchors], True)  # type: ignore[list-item]
+    return (anchors, False)  # type: ignore[return-value]
+def _is_anchor_pair(value: Any) -> bool:
+    if not isinstance(value, Sequence) or len(value) != 2:
+        return False
+    return all(isinstance(item, (float, int)) for item in value)
+def _resolve_anchors(
+    preset: str, *, anchor_format: str, model_size: tuple[int, int], model_strides: Sequence[int]
+) -> AnchorLike:
+    if preset.endswith(".json") is True:
+        with open(preset, "r", encoding="utf-8") as handle:
+            preset_spec = json.load(handle)
+    else:
+        if preset not in ANCHOR_PRESETS:
+            raise ValueError(f"Unknown anchor preset: {preset}")
+        preset_spec = ANCHOR_PRESETS[preset]
+    anchors = preset_spec["anchors"]
+    preset_size = tuple(preset_spec["size"])
+    preset_format = preset_spec["format"]
+    if preset_format == "grid":
+        if "strides" not in preset_spec:
+            raise ValueError("Preset is missing strides required for grid anchors")
+        preset_strides = preset_spec["strides"]
+        anchors = grid_to_pixels(anchors, preset_strides)
+    anchors = scale_anchors(anchors, preset_size, model_size)
+    if anchor_format == "pixels":
+        return anchors
+    if anchor_format == "grid":
+        return pixels_to_grid(anchors, model_strides)
+    raise ValueError(f"Unsupported anchor format: {anchor_format}")
+def resolve_anchor_group(
+    preset: str, *, anchor_format: str, model_size: tuple[int, int], model_strides: Sequence[int]
+) -> AnchorGroup:
+    anchors = _resolve_anchors(preset, anchor_format=anchor_format, model_size=model_size, model_strides=model_strides)
+    (anchor_groups, single) = _normalize_anchor_groups(anchors)
+    if single is False:
+        raise ValueError("Expected a single anchor group for this model")
+    return anchor_groups[0]
+def resolve_anchor_groups(
+    preset: str, *, anchor_format: str, model_size: tuple[int, int], model_strides: Sequence[int]
+) -> AnchorGroups:
+    anchors = _resolve_anchors(preset, anchor_format=anchor_format, model_size=model_size, model_strides=model_strides)
+    (anchor_groups, single) = _normalize_anchor_groups(anchors)
+    if single is True:
+        raise ValueError("Expected multiple anchor groups for this model")
+    return anchor_groups

birder/net/detection/yolo_v2.py CHANGED Viewed

@@ -17,18 +17,11 @@ from torch import nn
 from torchvision.ops import Conv2dNormActivation
 from torchvision.ops import boxes as box_ops
+from birder.model_registry import registry
 from birder.net.base import DetectorBackbone
 from birder.net.detection.base import DetectionBaseNet
 from birder.net.detection.base import ImageList
-# Default anchors from yolo.cfg (COCO dataset)
-DEFAULT_ANCHORS = [
-    (0.57273, 0.677385),
-    (1.87446, 2.06253),
-    (3.33843, 5.47434),
-    (7.88282, 3.52778),
-    (9.77052, 9.16828),
-]
+from birder.net.detection.yolo_anchors import resolve_anchor_group
 def decode_predictions(
@@ -102,8 +95,8 @@ def decode_predictions(
 class YOLOAnchorGenerator(nn.Module):
     def __init__(self, anchors: list[tuple[float, float]]) -> None:
         super().__init__()
-        self.anchors = anchors
-        self.num_anchors = len(anchors)
+        self.anchors = nn.Buffer(torch.tensor(anchors, dtype=torch.float32))
+        self.num_anchors: int = self.anchors.size(0)
     def num_anchors_per_location(self) -> int:
         return self.num_anchors
@@ -134,8 +127,7 @@ class YOLOAnchorGenerator(nn.Module):
         grid = torch.stack([grid_x, grid_y], dim=-1)
         # Scale anchors to feature map stride (anchors are in grid units)
-        anchors_tensor = torch.tensor(self.anchors, device=device, dtype=dtype)
-        anchors_tensor = anchors_tensor * torch.tensor([stride_w, stride_h], device=device, dtype=dtype)
+        anchors_tensor = self.anchors * torch.tensor([stride_w, stride_h], device=device, dtype=dtype)
         # Store strides as tensor
         stride = torch.tensor([stride_h, stride_w], device=device, dtype=dtype)
@@ -222,7 +214,6 @@ class YOLOHead(nn.Module):
 # pylint: disable=invalid-name
 class YOLO_v2(DetectionBaseNet):
     default_size = (416, 416)
-    auto_register = True
     def __init__(
         self,
@@ -234,7 +225,7 @@ class YOLO_v2(DetectionBaseNet):
         export_mode: bool = False,
     ) -> None:
         super().__init__(num_classes, backbone, config=config, size=size, export_mode=export_mode)
-        assert self.config is None, "config not supported"
+        assert self.config is not None, "must set config"
         self.num_classes = self.num_classes - 1
@@ -242,13 +233,19 @@ class YOLO_v2(DetectionBaseNet):
         nms_thresh = 0.45
         detections_per_img = 300
         mid_channels = 1024
-        self.ignore_thresh = 0.5
-        self.noobj_coeff = 0.5
-        self.coord_coeff = 5.0
-        self.obj_coeff = 1.0
-        self.cls_coeff = 1.0
+        ignore_thresh = 0.5
+        noobj_coeff = 0.5
+        coord_coeff = 5.0
+        obj_coeff = 1.0
+        cls_coeff = 1.0
+        anchor_spec = self.config["anchors"]
+        self.ignore_thresh = ignore_thresh
+        self.noobj_coeff = noobj_coeff
+        self.coord_coeff = coord_coeff
+        self.obj_coeff = obj_coeff
+        self.cls_coeff = cls_coeff
-        self.anchors = DEFAULT_ANCHORS
         self.score_thresh = score_thresh
         self.nms_thresh = nms_thresh
         self.detections_per_img = detections_per_img
@@ -258,7 +255,8 @@ class YOLO_v2(DetectionBaseNet):
         self.neck = YOLONeck(self.backbone.return_channels, mid_channels)
-        self.anchor_generator = YOLOAnchorGenerator(self.anchors)
+        anchors = resolve_anchor_group(anchor_spec, anchor_format="grid", model_size=self.size, model_strides=(32,))
+        self.anchor_generator = YOLOAnchorGenerator(anchors)
         num_anchors = self.anchor_generator.num_anchors_per_location()
         self.head = YOLOHead(self.neck.out_channels, num_anchors, self.num_classes)
@@ -319,7 +317,7 @@ class YOLO_v2(DetectionBaseNet):
         device = predictions.device
         dtype = predictions.dtype
         (batch_size, _, H, W) = predictions.size()
-        num_anchors = len(self.anchors)
+        num_anchors = self.anchor_generator.num_anchors
         stride_h = stride[0]
         stride_w = stride[1]
@@ -423,7 +421,7 @@ class YOLO_v2(DetectionBaseNet):
         device = predictions.device
         (N, _, H, W) = predictions.size()
-        num_anchors = len(self.anchors)
+        num_anchors = self.anchor_generator.num_anchors
         predictions = predictions.view(N, num_anchors, 5 + self.num_classes, H, W)
         predictions = predictions.permute(0, 1, 3, 4, 2).contiguous()
@@ -552,3 +550,6 @@ class YOLO_v2(DetectionBaseNet):
             detections = self.postprocess_detections(decoded_predictions, images.image_sizes)
         return (detections, losses)
+registry.register_model_config("yolo_v2", YOLO_v2, config={"anchors": "yolo_v2"})

birder/net/detection/yolo_v3.py CHANGED Viewed

@@ -17,32 +17,11 @@ from torch import nn
 from torchvision.ops import Conv2dNormActivation
 from torchvision.ops import boxes as box_ops
+from birder.model_registry import registry
 from birder.net.base import DetectorBackbone
 from birder.net.detection.base import DetectionBaseNet
 from birder.net.detection.base import ImageList
-# Default anchors from YOLO v3 paper (sorted by area, small to large)
-# These values are in absolute pixels (width, height) computed using K-Means
-# on the COCO dataset with a reference input size of 416x416.
-DEFAULT_ANCHORS = [
-    [(10.0, 13.0), (16.0, 30.0), (33.0, 23.0)],  # Small objects (stride 8)
-    [(30.0, 61.0), (62.0, 45.0), (59.0, 119.0)],  # Medium objects (stride 16)
-    [(116.0, 90.0), (156.0, 198.0), (373.0, 326.0)],  # Large objects (stride 32)
-]
-def scale_anchors(
-    anchors: list[list[tuple[float, float]]],
-    from_size: tuple[int, int],
-    to_size: tuple[int, int],
-) -> list[list[tuple[float, float]]]:
-    if from_size == to_size:
-        # Avoid aliasing default anchors in case they are mutated later
-        return [list(scale) for scale in anchors]
-    scale_h = to_size[0] / from_size[0]
-    scale_w = to_size[1] / from_size[1]
-    return [[(w * scale_w, h * scale_h) for (w, h) in scale] for scale in anchors]
+from birder.net.detection.yolo_anchors import resolve_anchor_groups
 def decode_predictions(
@@ -116,11 +95,20 @@ def decode_predictions(
 class YOLOAnchorGenerator(nn.Module):
     def __init__(self, anchors: list[list[tuple[float, float]]]) -> None:
         super().__init__()
-        self.anchors = anchors
-        self.num_scales = len(anchors)
+        self.anchors = nn.Buffer(torch.tensor(anchors, dtype=torch.float32))
+        self.num_scales = self.anchors.size(0)
     def num_anchors_per_location(self) -> list[int]:
-        return [len(a) for a in self.anchors]
+        return [a.size(0) for a in self.anchors]
+    def scale_anchors(self, from_size: tuple[int, int], to_size: tuple[int, int]) -> None:
+        if from_size == to_size:
+            return
+        scale_h = to_size[0] / from_size[0]
+        scale_w = to_size[1] / from_size[1]
+        self.anchors[..., 0].mul_(scale_w)
+        self.anchors[..., 1].mul_(scale_h)
     def forward(
         self, image_list: ImageList, feature_maps: list[torch.Tensor]
@@ -152,7 +140,7 @@ class YOLOAnchorGenerator(nn.Module):
             grid = torch.stack([grid_x, grid_y], dim=-1)
             # Select anchors for this scale
-            anchors_for_scale = torch.tensor(self.anchors[idx], device=device, dtype=dtype)
+            anchors_for_scale = self.anchors[idx]
             # Store strides as tensor
             strides = torch.tensor([stride_h, stride_w], device=device, dtype=dtype)
@@ -321,7 +309,6 @@ class YOLONeck(nn.Module):
 # pylint: disable=invalid-name
 class YOLO_v3(DetectionBaseNet):
     default_size = (416, 416)
-    auto_register = True
     def __init__(
         self,
@@ -333,20 +320,26 @@ class YOLO_v3(DetectionBaseNet):
         export_mode: bool = False,
     ) -> None:
         super().__init__(num_classes, backbone, config=config, size=size, export_mode=export_mode)
-        assert self.config is None, "config not supported"
+        assert self.config is not None, "must set config"
         self.num_classes = self.num_classes - 1
         score_thresh = 0.05
         nms_thresh = 0.45
         detections_per_img = 300
-        self.ignore_thresh = 0.5
-        self.noobj_coeff = 0.2
-        self.coord_coeff = 5.0
-        self.obj_coeff = 1.0
-        self.cls_coeff = 1.0
+        ignore_thresh = 0.5
+        noobj_coeff = 0.2
+        coord_coeff = 5.0
+        obj_coeff = 1.0
+        cls_coeff = 1.0
+        anchor_spec = self.config["anchors"]
+        self.ignore_thresh = ignore_thresh
+        self.noobj_coeff = noobj_coeff
+        self.coord_coeff = coord_coeff
+        self.obj_coeff = obj_coeff
+        self.cls_coeff = cls_coeff
-        self.anchors = scale_anchors(DEFAULT_ANCHORS, self.default_size, self.size)
         self.score_thresh = score_thresh
         self.nms_thresh = nms_thresh
         self.detections_per_img = detections_per_img
@@ -356,7 +349,10 @@ class YOLO_v3(DetectionBaseNet):
         self.neck = YOLONeck(self.backbone.return_channels)
-        self.anchor_generator = YOLOAnchorGenerator(self.anchors)
+        anchors = resolve_anchor_groups(
+            anchor_spec, anchor_format="pixels", model_size=self.size, model_strides=(8, 16, 32)
+        )
+        self.anchor_generator = YOLOAnchorGenerator(anchors)
         num_anchors = self.anchor_generator.num_anchors_per_location()
         self.head = YOLOHead(self.neck.out_channels, num_anchors, self.num_classes)
@@ -376,8 +372,7 @@ class YOLO_v3(DetectionBaseNet):
         super().adjust_size(new_size)
         if adjust_anchors is True:
-            self.anchors = scale_anchors(self.anchors, old_size, new_size)
-            self.anchor_generator.anchors = self.anchors
+            self.anchor_generator.scale_anchors(old_size, new_size)
     def freeze(self, freeze_classifier: bool = True) -> None:
         for param in self.parameters():
@@ -435,7 +430,7 @@ class YOLO_v3(DetectionBaseNet):
         # Build flat list of all anchors with their scale indices
         all_anchors = torch.concat(anchors, dim=0)
-        anchors_per_scale = [len(self.anchors[i]) for i in range(num_scales)]
+        anchors_per_scale = self.anchor_generator.num_anchors_per_location()
         cumsum_anchors = torch.tensor([0] + anchors_per_scale, device=device).cumsum(0)
         # Get grid sizes and strides for each scale
@@ -586,6 +581,7 @@ class YOLO_v3(DetectionBaseNet):
         (target_tensors, obj_masks, noobj_masks) = self._build_targets(predictions, targets, anchors, strides)
         device = predictions[0].device
+        anchors_per_scale = self.anchor_generator.num_anchors_per_location()
         coord_loss = torch.tensor(0.0, device=device)
         obj_loss = torch.tensor(0.0, device=device)
         noobj_loss = torch.tensor(0.0, device=device)
@@ -594,7 +590,7 @@ class YOLO_v3(DetectionBaseNet):
         num_obj = 0
         for scale_idx, pred in enumerate(predictions):
             (N, _, H, W) = pred.size()
-            num_anchors_scale = len(self.anchors[scale_idx])
+            num_anchors_scale = anchors_per_scale[scale_idx]
             pred = pred.view(N, num_anchors_scale, 5 + self.num_classes, H, W)
             pred = pred.permute(0, 1, 3, 4, 2).contiguous()
@@ -730,3 +726,6 @@ class YOLO_v3(DetectionBaseNet):
             detections = self.postprocess_detections(decoded_predictions, images.image_sizes)
         return (detections, losses)
+registry.register_model_config("yolo_v3", YOLO_v3, config={"anchors": "yolo_v3"})

birder/net/detection/yolo_v4.py CHANGED Viewed

@@ -17,18 +17,12 @@ from torch import nn
 from torchvision.ops import Conv2dNormActivation
 from torchvision.ops import boxes as box_ops
+from birder.model_registry import registry
 from birder.net.base import DetectorBackbone
 from birder.net.detection.base import DetectionBaseNet
+from birder.net.detection.yolo_anchors import resolve_anchor_groups
 from birder.net.detection.yolo_v3 import YOLOAnchorGenerator
 from birder.net.detection.yolo_v3 import YOLOHead
-from birder.net.detection.yolo_v3 import scale_anchors
-# Default anchors from YOLO v4 (COCO)
-DEFAULT_ANCHORS = [
-    [(12.0, 16.0), (19.0, 36.0), (40.0, 28.0)],  # Small
-    [(36.0, 75.0), (76.0, 55.0), (72.0, 146.0)],  # Medium
-    [(142.0, 110.0), (192.0, 243.0), (459.0, 401.0)],  # Large
-]
 # Scale factors per detection scale to eliminate grid sensitivity
 DEFAULT_SCALE_XY = [1.2, 1.1, 1.05]  # [small, medium, large]
@@ -59,7 +53,6 @@ def decode_predictions(
         Number of classes.
     scale_xy
         Scale factor for grid sensitivity elimination.
-        YOLOv4 uses 1.05-1.2 depending on scale level.
     Returns
     -------
@@ -378,7 +371,6 @@ class YOLONeck(nn.Module):
 # pylint: disable=invalid-name
 class YOLO_v4(DetectionBaseNet):
     default_size = (608, 608)
-    auto_register = True
     def __init__(
         self,
@@ -390,22 +382,26 @@ class YOLO_v4(DetectionBaseNet):
         export_mode: bool = False,
     ) -> None:
         super().__init__(num_classes, backbone, config=config, size=size, export_mode=export_mode)
-        assert self.config is None, "config not supported"
+        assert self.config is not None, "must set config"
         self.num_classes = self.num_classes - 1
         score_thresh = 0.05
         nms_thresh = 0.45
         detections_per_img = 300
-        self.ignore_thresh = 0.7
-        # Loss coefficients
-        self.noobj_coeff = 0.25
-        self.coord_coeff = 3.0
-        self.obj_coeff = 1.0
-        self.cls_coeff = 1.0
-        self.anchors = scale_anchors(DEFAULT_ANCHORS, self.default_size, self.size)
+        ignore_thresh = 0.7
+        noobj_coeff = 0.25
+        coord_coeff = 3.0
+        obj_coeff = 1.0
+        cls_coeff = 1.0
+        label_smoothing = 0.1
+        anchor_spec = self.config["anchors"]
+        self.ignore_thresh = ignore_thresh
+        self.noobj_coeff = noobj_coeff
+        self.coord_coeff = coord_coeff
+        self.obj_coeff = obj_coeff
+        self.cls_coeff = cls_coeff
         self.scale_xy = DEFAULT_SCALE_XY
         self.score_thresh = score_thresh
         self.nms_thresh = nms_thresh
@@ -414,13 +410,16 @@ class YOLO_v4(DetectionBaseNet):
         self.backbone.return_channels = self.backbone.return_channels[-3:]
         self.backbone.return_stages = self.backbone.return_stages[-3:]
-        self.label_smoothing = 0.1
+        self.label_smoothing = label_smoothing
         self.smooth_positive = 1.0 - self.label_smoothing
         self.smooth_negative = self.label_smoothing / self.num_classes
         self.neck = YOLONeck(self.backbone.return_channels)
-        self.anchor_generator = YOLOAnchorGenerator(self.anchors)
+        anchors = resolve_anchor_groups(
+            anchor_spec, anchor_format="pixels", model_size=self.size, model_strides=(8, 16, 32)
+        )
+        self.anchor_generator = YOLOAnchorGenerator(anchors)
         num_anchors = self.anchor_generator.num_anchors_per_location()
         self.head = YOLOHead(self.neck.out_channels, num_anchors, self.num_classes)
@@ -441,8 +440,7 @@ class YOLO_v4(DetectionBaseNet):
         super().adjust_size(new_size)
         if adjust_anchors is True:
-            self.anchors = scale_anchors(self.anchors, old_size, new_size)
-            self.anchor_generator = YOLOAnchorGenerator(self.anchors)
+            self.anchor_generator.scale_anchors(old_size, new_size)
     def freeze(self, freeze_classifier: bool = True) -> None:
         for param in self.parameters():
@@ -500,7 +498,7 @@ class YOLO_v4(DetectionBaseNet):
         # Build flat list of all anchors with their scale indices
         all_anchors = torch.concat(anchors, dim=0)
-        anchors_per_scale = [len(self.anchors[i]) for i in range(num_scales)]
+        anchors_per_scale = self.anchor_generator.num_anchors_per_location()
         cumsum_anchors = torch.tensor([0] + anchors_per_scale, device=device).cumsum(0)
         # Get grid sizes and strides for each scale
@@ -651,6 +649,7 @@ class YOLO_v4(DetectionBaseNet):
         (target_tensors, obj_masks, noobj_masks) = self._build_targets(predictions, targets, anchors, strides)
         device = predictions[0].device
+        anchors_per_scale = self.anchor_generator.num_anchors_per_location()
         coord_loss = torch.tensor(0.0, device=device)
         obj_loss = torch.tensor(0.0, device=device)
         noobj_loss = torch.tensor(0.0, device=device)
@@ -659,7 +658,7 @@ class YOLO_v4(DetectionBaseNet):
         num_obj = 0
         for scale_idx, pred in enumerate(predictions):
             (N, _, H, W) = pred.size()
-            num_anchors_scale = len(self.anchors[scale_idx])
+            num_anchors_scale = anchors_per_scale[scale_idx]
             stride_h = strides[scale_idx][0]
             stride_w = strides[scale_idx][1]
             scale_xy = self.scale_xy[scale_idx]
@@ -829,3 +828,6 @@ class YOLO_v4(DetectionBaseNet):
             detections = self.postprocess_detections(decoded_predictions, images.image_sizes)
         return (detections, losses)
+registry.register_model_config("yolo_v4", YOLO_v4, config={"anchors": "yolo_v4"})

birder 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

birder 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl