PyPI - birder - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

birder 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

birder/common/lib.py +2 -9
birder/common/training_cli.py +24 -0
birder/common/training_utils.py +338 -41
birder/data/collators/detection.py +11 -3
birder/data/dataloader/webdataset.py +12 -2
birder/data/datasets/coco.py +8 -10
birder/data/transforms/detection.py +30 -13
birder/inference/detection.py +108 -4
birder/inference/wbf.py +226 -0
birder/kernels/load_kernel.py +16 -11
birder/kernels/soft_nms/soft_nms.cpp +17 -18
birder/net/__init__.py +8 -0
birder/net/cait.py +4 -3
birder/net/convnext_v1.py +5 -0
birder/net/crossformer.py +33 -30
birder/net/crossvit.py +4 -3
birder/net/deit.py +3 -3
birder/net/deit3.py +3 -3
birder/net/detection/deformable_detr.py +2 -5
birder/net/detection/detr.py +2 -5
birder/net/detection/efficientdet.py +67 -93
birder/net/detection/fcos.py +2 -7
birder/net/detection/retinanet.py +2 -7
birder/net/detection/rt_detr_v1.py +2 -0
birder/net/detection/yolo_anchors.py +205 -0
birder/net/detection/yolo_v2.py +25 -24
birder/net/detection/yolo_v3.py +39 -40
birder/net/detection/yolo_v4.py +28 -26
birder/net/detection/yolo_v4_tiny.py +24 -20
birder/net/efficientformer_v1.py +15 -9
birder/net/efficientformer_v2.py +39 -29
birder/net/efficientvit_msft.py +9 -7
birder/net/fasternet.py +1 -1
birder/net/fastvit.py +1 -0
birder/net/flexivit.py +5 -4
birder/net/gc_vit.py +671 -0
birder/net/hiera.py +12 -9
birder/net/hornet.py +9 -7
birder/net/iformer.py +8 -6
birder/net/levit.py +42 -30
birder/net/lit_v1.py +472 -0
birder/net/lit_v1_tiny.py +357 -0
birder/net/lit_v2.py +436 -0
birder/net/maxvit.py +67 -55
birder/net/mobilenet_v4_hybrid.py +1 -1
birder/net/mobileone.py +1 -0
birder/net/mvit_v2.py +13 -12
birder/net/pit.py +4 -3
birder/net/pvt_v1.py +4 -1
birder/net/repghost.py +1 -0
birder/net/repvgg.py +1 -0
birder/net/repvit.py +1 -0
birder/net/resnet_v1.py +1 -1
birder/net/resnext.py +67 -25
birder/net/rope_deit3.py +5 -3
birder/net/rope_flexivit.py +7 -4
birder/net/rope_vit.py +10 -5
birder/net/se_resnet_v1.py +46 -0
birder/net/se_resnext.py +3 -0
birder/net/simple_vit.py +11 -8
birder/net/swin_transformer_v1.py +71 -68
birder/net/swin_transformer_v2.py +38 -31
birder/net/tiny_vit.py +20 -10
birder/net/transnext.py +38 -28
birder/net/vit.py +5 -19
birder/net/vit_parallel.py +5 -4
birder/net/vit_sam.py +38 -37
birder/net/vovnet_v1.py +15 -0
birder/net/vovnet_v2.py +31 -1
birder/ops/msda.py +108 -43
birder/ops/swattention.py +124 -61
birder/results/detection.py +4 -0
birder/scripts/benchmark.py +110 -32
birder/scripts/predict.py +8 -0
birder/scripts/predict_detection.py +18 -11
birder/scripts/train.py +48 -46
birder/scripts/train_barlow_twins.py +44 -45
birder/scripts/train_byol.py +44 -45
birder/scripts/train_capi.py +50 -49
birder/scripts/train_data2vec.py +45 -47
birder/scripts/train_data2vec2.py +45 -47
birder/scripts/train_detection.py +83 -50
birder/scripts/train_dino_v1.py +60 -47
birder/scripts/train_dino_v2.py +86 -52
birder/scripts/train_dino_v2_dist.py +84 -50
birder/scripts/train_franca.py +51 -52
birder/scripts/train_i_jepa.py +45 -47
birder/scripts/train_ibot.py +51 -53
birder/scripts/train_kd.py +194 -76
birder/scripts/train_mim.py +44 -45
birder/scripts/train_mmcr.py +44 -45
birder/scripts/train_rotnet.py +45 -46
birder/scripts/train_simclr.py +44 -45
birder/scripts/train_vicreg.py +44 -45
birder/tools/auto_anchors.py +20 -1
birder/tools/convert_model.py +18 -15
birder/tools/det_results.py +114 -2
birder/tools/pack.py +172 -103
birder/tools/quantize_model.py +73 -67
birder/tools/show_det_iterator.py +10 -1
birder/version.py +1 -1
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/METADATA +4 -3
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/RECORD +107 -101
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/WHEEL +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/entry_points.txt +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/licenses/LICENSE +0 -0
{birder-0.2.2.dist-info → birder-0.3.0.dist-info}/top_level.txt +0 -0

birder/data/dataloader/webdataset.py CHANGED Viewed

@@ -22,9 +22,19 @@ def make_wds_loader(
     shuffle: bool = False,
     *,
     exact: bool = False,
+    infinite: bool = False,
 ) -> DataLoader:
+    assert exact is False or infinite is False
+    if infinite is True:
+        dataset_iterable = dataset.repeat()
+    elif exact is False:
+        dataset_iterable = dataset.repeat()
+    else:
+        dataset_iterable = dataset
     dataloader = wds.WebLoader(
-        dataset.repeat() if exact is False else dataset,
+        dataset_iterable,
         batch_size=batch_size,
         num_workers=num_workers,
         prefetch_factor=prefetch_factor,
@@ -43,7 +53,7 @@ def make_wds_loader(
         epoch_size = math.ceil(len(dataset) / (batch_size * world_size))
     dataloader = dataloader.with_length(epoch_size, silent=True)
-    if exact is False:
+    if exact is False and infinite is False:
         dataloader = dataloader.with_epoch(epoch_size)
     return dataloader

birder/data/datasets/coco.py CHANGED Viewed

@@ -98,10 +98,14 @@ class CocoTraining(CocoBase):
 class CocoInference(CocoBase):
     def __getitem__(self, index: int) -> tuple[str, torch.Tensor, Any, list[int]]:
         coco_id = self.dataset.ids[index]
-        path = self.dataset.coco.loadImgs(coco_id)[0]["file_name"]
+        img_info = self.dataset.coco.loadImgs(coco_id)[0]
+        path = img_info["file_name"]
         (sample, labels) = self.dataset[index]
-        return (path, sample, labels, F.get_size(sample))
+        # Get original image size (height, width) before transforms
+        orig_size = [img_info["height"], img_info["width"]]
+        return (path, sample, labels, orig_size)
 class CocoMosaicTraining(CocoBase):
@@ -127,9 +131,7 @@ class CocoMosaicTraining(CocoBase):
         self._mosaic_decay_epochs: Optional[int] = None
         self._mosaic_decay_start: Optional[int] = None
-    def configure_mosaic_linear_decay(
-        self, base_prob: float, total_epochs: int, decay_fraction: float = 0.1
-    ) -> None:
+    def configure_mosaic_linear_decay(self, base_prob: float, total_epochs: int, decay_fraction: float = 0.1) -> None:
         if total_epochs <= 0:
             raise ValueError("total_epochs must be positive")
         if decay_fraction <= 0.0 or decay_fraction > 1.0:
@@ -141,11 +143,7 @@ class CocoMosaicTraining(CocoBase):
         self._mosaic_decay_start = max(1, total_epochs - decay_epochs + 1)
     def update_mosaic_prob(self, epoch: int) -> Optional[float]:
-        if (
-            self._mosaic_base_prob is None
-            or self._mosaic_decay_epochs is None
-            or self._mosaic_decay_start is None
-        ):
+        if self._mosaic_base_prob is None or self._mosaic_decay_epochs is None or self._mosaic_decay_start is None:
             return None
         if epoch >= self._mosaic_decay_start:

birder/data/transforms/detection.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import math
 import random
 from collections.abc import Callable
 from typing import Any
@@ -10,6 +11,24 @@ from torchvision.transforms import v2
 from birder.data.transforms.classification import RGBType
+MULTISCALE_STEP = 32
+DEFAULT_MULTISCALE_MIN_SIZE = 480
+DEFAULT_MULTISCALE_MAX_SIZE = 800
+def build_multiscale_sizes(
+    min_size: Optional[int] = None, max_size: int = DEFAULT_MULTISCALE_MAX_SIZE
+) -> tuple[int, ...]:
+    if min_size is None:
+        min_size = DEFAULT_MULTISCALE_MIN_SIZE
+    start = int(math.ceil(min_size / MULTISCALE_STEP) * MULTISCALE_STEP)
+    end = int(math.floor(max_size / MULTISCALE_STEP) * MULTISCALE_STEP)
+    if end < start:
+        return (start,)
+    return tuple(range(start, end + 1, MULTISCALE_STEP))
 class ResizeWithRandomInterpolation(nn.Module):
     def __init__(
@@ -39,6 +58,7 @@ def get_birder_augment(
     dynamic_size: bool,
     multiscale: bool,
     max_size: Optional[int],
+    multiscale_min_size: Optional[int],
     post_mosaic: bool = False,
 ) -> Callable[..., torch.Tensor]:
     if dynamic_size is True:
@@ -78,9 +98,7 @@ def get_birder_augment(
     # Resize
     if multiscale is True:
         transformations.append(
-            v2.RandomShortestSize(
-                min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=max_size or 1333
-            ),
+            v2.RandomShortestSize(min_size=build_multiscale_sizes(multiscale_min_size), max_size=max_size or 1333),
         )
     else:
         transformations.append(
@@ -132,6 +150,7 @@ def get_birder_augment(
 AugType = Literal["birder", "lsj", "multiscale", "ssd", "ssdlite", "yolo", "detr"]
+# pylint: disable=too-many-return-statements
 def training_preset(
     size: tuple[int, int],
     aug_type: AugType,
@@ -140,6 +159,7 @@ def training_preset(
     dynamic_size: bool = False,
     multiscale: bool = False,
     max_size: Optional[int] = None,
+    multiscale_min_size: Optional[int] = None,
     post_mosaic: bool = False,
 ) -> Callable[..., torch.Tensor]:
     mean = rgv_values["mean"]
@@ -159,7 +179,9 @@ def training_preset(
         return v2.Compose(  # type:ignore
             [
                 v2.ToImage(),
-                get_birder_augment(size, level, fill_value, dynamic_size, multiscale, max_size, post_mosaic),
+                get_birder_augment(
+                    size, level, fill_value, dynamic_size, multiscale, max_size, multiscale_min_size, post_mosaic
+                ),
                 v2.ToDtype(torch.float32, scale=True),
                 v2.Normalize(mean=mean, std=std),
                 v2.ToPureTensor(),
@@ -190,9 +212,7 @@ def training_preset(
         return v2.Compose(  # type: ignore
             [
                 v2.ToImage(),
-                v2.RandomShortestSize(
-                    min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=max_size or 1333
-                ),
+                v2.RandomShortestSize(min_size=build_multiscale_sizes(multiscale_min_size), max_size=max_size or 1333),
                 v2.RandomHorizontalFlip(0.5),
                 v2.SanitizeBoundingBoxes(),
                 v2.ToDtype(torch.float32, scale=True),
@@ -264,21 +284,18 @@ def training_preset(
         )
     if aug_type == "detr":
+        multiscale_sizes = build_multiscale_sizes(multiscale_min_size)
         return v2.Compose(  # type: ignore
             [
                 v2.ToImage(),
                 v2.RandomChoice(
                     [
-                        v2.RandomShortestSize(
-                            (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=max_size or 1333
-                        ),
+                        v2.RandomShortestSize(min_size=multiscale_sizes, max_size=max_size or 1333),
                         v2.Compose(
                             [
                                 v2.RandomShortestSize((400, 500, 600)),
                                 v2.RandomIoUCrop() if post_mosaic is False else v2.Identity(),  # RandomSizeCrop
-                                v2.RandomShortestSize(
-                                    (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=max_size or 1333
-                                ),
+                                v2.RandomShortestSize(min_size=multiscale_sizes, max_size=max_size or 1333),
                             ]
                         ),
                     ]

birder/inference/detection.py CHANGED Viewed

@@ -5,17 +5,99 @@ from typing import Optional
 import torch
 import torch.amp
 from PIL import Image
+from torch.nn import functional as F
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from birder.conf import settings
+from birder.data.collators.detection import batch_images
 from birder.data.transforms.detection import InferenceTransform
+from birder.inference.wbf import fuse_detections_wbf
+from birder.net.base import make_divisible
+def _normalize_image_sizes(inputs: torch.Tensor, image_sizes: Optional[list[list[int]]]) -> list[list[int]]:
+    if image_sizes is not None:
+        return image_sizes
+    (_, _, height, width) = inputs.shape
+    return [[height, width] for _ in range(inputs.size(0))]
+def _hflip_inputs(inputs: torch.Tensor, image_sizes: list[list[int]]) -> torch.Tensor:
+    # Detection collator pads on the right/bottom, so flip only the valid region to keep padding aligned.
+    flipped = inputs.clone()
+    for idx, (height, width) in enumerate(image_sizes):
+        flipped[idx, :, :height, :width] = torch.flip(inputs[idx, :, :height, :width], dims=[2])
+    return flipped
+def _resize_batch(
+    inputs: torch.Tensor, image_sizes: list[list[int]], scale: float, size_divisible: int
+) -> tuple[torch.Tensor, torch.Tensor, list[list[int]]]:
+    resized_images: list[torch.Tensor] = []
+    for idx, (height, width) in enumerate(image_sizes):
+        target_h = make_divisible(height * scale, size_divisible)
+        target_w = make_divisible(width * scale, size_divisible)
+        image = inputs[idx, :, :height, :width]
+        resized = F.interpolate(image.unsqueeze(0), size=(target_h, target_w), mode="bilinear", align_corners=False)
+        resized_images.append(resized.squeeze(0))
+    return batch_images(resized_images, size_divisible)
+def _rescale_boxes(boxes: torch.Tensor, from_size: list[int], to_size: list[int]) -> torch.Tensor:
+    scale_w = to_size[1] / from_size[1]
+    scale_h = to_size[0] / from_size[0]
+    scale = boxes.new_tensor([scale_w, scale_h, scale_w, scale_h])
+    return boxes * scale
+def _rescale_detections(
+    detections: list[dict[str, torch.Tensor]],
+    from_sizes: list[list[int]],
+    to_sizes: list[list[int]],
+) -> list[dict[str, torch.Tensor]]:
+    for idx, (detection, from_size, to_size) in enumerate(zip(detections, from_sizes, to_sizes)):
+        boxes = detection["boxes"]
+        if boxes.numel() == 0:
+            continue
+        detections[idx]["boxes"] = _rescale_boxes(boxes, from_size, to_size)
+    return detections
+def _invert_hflip_boxes(boxes: torch.Tensor, image_size: list[int]) -> torch.Tensor:
+    width = boxes.new_tensor(image_size[1])
+    x1 = boxes[:, 0]
+    x2 = boxes[:, 2]
+    flipped = boxes.clone()
+    flipped[:, 0] = width - x2
+    flipped[:, 2] = width - x1
+    return flipped
+def _invert_detections(
+    detections: list[dict[str, torch.Tensor]], image_sizes: list[list[int]]
+) -> list[dict[str, torch.Tensor]]:
+    for idx, (detection, image_size) in enumerate(zip(detections, image_sizes)):
+        boxes = detection["boxes"]
+        if boxes.numel() == 0:
+            continue
+        detections[idx]["boxes"] = _invert_hflip_boxes(boxes, image_size)
+    return detections
 def infer_image(
     net: torch.nn.Module | torch.ScriptModule,
     sample: Image.Image | str,
     transform: Callable[..., torch.Tensor],
+    tta: bool = False,
     device: Optional[torch.device] = None,
     score_threshold: Optional[float] = None,
     **kwargs: Any,
@@ -43,7 +125,7 @@ def infer_image(
         device = torch.device("cpu")
     input_tensor = transform(image).unsqueeze(dim=0).to(device)
-    detections = infer_batch(net, input_tensor, **kwargs)
+    detections = infer_batch(net, input_tensor, tta=tta, **kwargs)
     if score_threshold is not None:
         for i, detection in enumerate(detections):
             idxs = torch.where(detection["scores"] > score_threshold)
@@ -63,16 +145,36 @@ def infer_batch(
     inputs: torch.Tensor,
     masks: Optional[torch.Tensor] = None,
     image_sizes: Optional[list[list[int]]] = None,
+    tta: bool = False,
     **kwargs: Any,
 ) -> list[dict[str, torch.Tensor]]:
-    (detections, _) = net(inputs, masks=masks, image_sizes=image_sizes, **kwargs)
-    return detections  # type: ignore[no-any-return]
+    if tta is False:
+        (detections, _) = net(inputs, masks=masks, image_sizes=image_sizes, **kwargs)
+        return detections  # type: ignore[no-any-return]
+    normalized_sizes = _normalize_image_sizes(inputs, image_sizes)
+    detections_list: list[list[dict[str, torch.Tensor]]] = []
+    for scale in (0.8, 1.0, 1.2):
+        (scaled_inputs, scaled_masks, scaled_sizes) = _resize_batch(inputs, normalized_sizes, scale, size_divisible=32)
+        (detections, _) = net(scaled_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
+        detections = _rescale_detections(detections, scaled_sizes, normalized_sizes)
+        detections_list.append(detections)
+        flipped_inputs = _hflip_inputs(scaled_inputs, scaled_sizes)
+        (flipped_detections, _) = net(flipped_inputs, masks=scaled_masks, image_sizes=scaled_sizes, **kwargs)
+        flipped_detections = _invert_detections(flipped_detections, scaled_sizes)
+        flipped_detections = _rescale_detections(flipped_detections, scaled_sizes, normalized_sizes)
+        detections_list.append(flipped_detections)
+    return fuse_detections_wbf(detections_list, iou_thr=0.55, conf_type="avg")
 def infer_dataloader(
     device: torch.device,
     net: torch.nn.Module | torch.ScriptModule,
     dataloader: DataLoader,
+    tta: bool = False,
     model_dtype: torch.dtype = torch.float32,
     amp: bool = False,
     amp_dtype: Optional[torch.dtype] = None,
@@ -97,6 +199,8 @@ def infer_dataloader(
         The model to use for inference.
     dataloader
         The DataLoader containing the dataset to perform inference on.
+    tta
+        Run inference with multi-scale and horizontal flip test time augmentation and fuse results with WBF.
     model_dtype
         The base dtype to use.
     amp
@@ -142,7 +246,7 @@ def infer_dataloader(
             masks = masks.to(device, non_blocking=True)
             with torch.amp.autocast(device.type, enabled=amp, dtype=amp_dtype):
-                detections = infer_batch(net, inputs, masks, image_sizes)
+                detections = infer_batch(net, inputs, masks=masks, image_sizes=image_sizes, tta=tta)
             detections = InferenceTransform.postprocess(detections, image_sizes, orig_sizes)
             if targets[0] != settings.NO_LABEL:

birder/inference/wbf.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""
+Weighted Boxes Fusion, adapted from
+https://github.com/ZFTurbo/Weighted-Boxes-Fusion
+Paper "Weighted boxes fusion: Ensembling boxes from different object detection models",
+https://arxiv.org/abs/1910.13302
+"""
+# Reference license: MIT
+from dataclasses import dataclass
+from typing import Literal
+from typing import Optional
+import torch
+from torchvision.ops import box_iou
+ConfType = Literal["avg", "max", "box_and_model_avg", "absent_model_aware_avg"]
+@dataclass
+class BoxCluster:
+    box: torch.Tensor
+    score_weight_sum: torch.Tensor
+    weight_sum: torch.Tensor
+    max_score: torch.Tensor
+    boxes_count: int
+    @classmethod
+    def from_entry(cls, box: torch.Tensor, score: torch.Tensor, weight: torch.Tensor) -> "BoxCluster":
+        score_weight = score * weight
+        return cls(
+            box=box.clone(),
+            score_weight_sum=score_weight,
+            weight_sum=weight,
+            max_score=score,
+            boxes_count=1,
+        )
+    def add(self, box: torch.Tensor, score: torch.Tensor, weight: torch.Tensor) -> None:
+        score_weight = score * weight
+        total_weight = self.score_weight_sum + score_weight
+        self.box = (self.box * self.score_weight_sum + box * score_weight) / total_weight
+        self.score_weight_sum = total_weight
+        self.weight_sum += weight
+        self.max_score = torch.maximum(self.max_score, score)
+        self.boxes_count += 1
+# pylint: disable=too-many-locals,too-many-branches
+def weighted_boxes_fusion(
+    boxes_list: list[torch.Tensor],
+    scores_list: list[torch.Tensor],
+    labels_list: list[torch.Tensor],
+    weights: Optional[list[float]] = None,
+    iou_thr: float = 0.55,
+    skip_box_thr: float = 0.0,
+    conf_type: ConfType = "avg",
+    allows_overflow: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    if weights is None:
+        weights = [1.0] * len(boxes_list)
+    if len(weights) != len(boxes_list):
+        raise ValueError("weights must match number of box sets")
+    if len(boxes_list) > 0:
+        device = boxes_list[0].device
+    else:
+        device = torch.device("cpu")
+    boxes_all: list[torch.Tensor] = []
+    scores_all: list[torch.Tensor] = []
+    labels_all: list[torch.Tensor] = []
+    weights_all: list[torch.Tensor] = []
+    for boxes, scores, labels, weight in zip(boxes_list, scores_list, labels_list, weights):
+        if boxes.numel() == 0:
+            continue
+        boxes_tensor = boxes.detach().to(dtype=torch.float32)
+        scores_tensor = scores.detach().to(dtype=torch.float32)
+        labels_tensor = labels.detach().to(dtype=torch.int64)
+        keep = scores_tensor >= skip_box_thr
+        if not keep.any():
+            continue
+        boxes_tensor = boxes_tensor[keep]
+        scores_tensor = scores_tensor[keep]
+        labels_tensor = labels_tensor[keep]
+        weights_tensor = scores_tensor.new_full(scores_tensor.shape, weight)
+        boxes_all.append(boxes_tensor)
+        scores_all.append(scores_tensor)
+        labels_all.append(labels_tensor)
+        weights_all.append(weights_tensor)
+    if len(boxes_all) == 0:
+        empty_boxes = torch.zeros((0, 4), dtype=torch.float32, device=device)
+        empty_scores = torch.zeros((0,), dtype=torch.float32, device=device)
+        empty_labels = torch.zeros((0,), dtype=torch.int64, device=device)
+        return (empty_boxes, empty_scores, empty_labels)
+    boxes_tensor = torch.concat(boxes_all, dim=0)
+    scores_tensor = torch.concat(scores_all, dim=0)
+    labels_tensor = torch.concat(labels_all, dim=0)
+    weights_tensor = torch.concat(weights_all, dim=0)
+    labels_unique = torch.unique(labels_tensor)
+    total_weight = float(sum(weights))
+    num_models = len(weights)
+    fused_boxes: list[torch.Tensor] = []
+    fused_scores: list[torch.Tensor] = []
+    fused_labels: list[torch.Tensor] = []
+    for label in labels_unique:
+        label_mask = labels_tensor == label
+        label_boxes = boxes_tensor[label_mask]
+        label_scores = scores_tensor[label_mask]
+        label_weights = weights_tensor[label_mask]
+        order = torch.argsort(label_scores, descending=True)
+        clusters: list[BoxCluster] = []
+        for idx in order:
+            box = label_boxes[idx]
+            score = label_scores[idx]
+            weight = label_weights[idx]
+            if len(clusters) == 0:
+                clusters.append(BoxCluster.from_entry(box, score, weight))
+                continue
+            cluster_boxes = torch.stack([cluster.box for cluster in clusters], dim=0)
+            ious = box_iou(box.unsqueeze(0), cluster_boxes).squeeze(0)
+            max_iou, max_idx = torch.max(ious, dim=0)
+            if max_iou > iou_thr:
+                clusters[int(max_idx)].add(box, score, weight)
+            else:
+                clusters.append(BoxCluster.from_entry(box, score, weight))
+        for cluster in clusters:
+            if conf_type == "avg":
+                score = cluster.score_weight_sum / cluster.weight_sum
+            elif conf_type == "max":
+                score = cluster.max_score
+            elif conf_type == "box_and_model_avg":
+                score = (cluster.score_weight_sum / cluster.weight_sum) * (cluster.boxes_count / num_models)
+            elif conf_type == "absent_model_aware_avg":
+                score = cluster.score_weight_sum / total_weight
+            else:
+                raise ValueError(f"Unsupported conf_type: {conf_type}")
+            if allows_overflow is False:
+                score = score.clamp(max=1.0)
+            fused_boxes.append(cluster.box)
+            fused_scores.append(score)
+            fused_labels.append(label)
+    fused_scores_tensor = torch.stack(fused_scores)
+    order = torch.argsort(fused_scores_tensor, descending=True)
+    fused_boxes_tensor = torch.stack(fused_boxes, dim=0)[order]
+    fused_scores_tensor = fused_scores_tensor[order]
+    fused_labels_tensor = torch.stack(fused_labels)[order]
+    return (fused_boxes_tensor, fused_scores_tensor, fused_labels_tensor)
+def fuse_detections_wbf_single(
+    detections: list[dict[str, torch.Tensor]],
+    weights: Optional[list[float]] = None,
+    iou_thr: float = 0.55,
+    skip_box_thr: float = 0.0,
+    conf_type: ConfType = "avg",
+    allows_overflow: bool = False,
+) -> dict[str, torch.Tensor]:
+    if len(detections) == 0:
+        return {
+            "boxes": torch.zeros((0, 4)),
+            "scores": torch.zeros((0,)),
+            "labels": torch.zeros((0,), dtype=torch.int64),
+        }
+    boxes_list = [detection["boxes"] for detection in detections]
+    scores_list = [detection["scores"] for detection in detections]
+    labels_list = [detection["labels"] for detection in detections]
+    (boxes, scores, labels) = weighted_boxes_fusion(
+        boxes_list,
+        scores_list,
+        labels_list,
+        weights=weights,
+        iou_thr=iou_thr,
+        skip_box_thr=skip_box_thr,
+        conf_type=conf_type,
+        allows_overflow=allows_overflow,
+    )
+    return {"boxes": boxes, "scores": scores, "labels": labels}
+def fuse_detections_wbf(
+    detections_list: list[list[dict[str, torch.Tensor]]],
+    weights: Optional[list[float]] = None,
+    iou_thr: float = 0.55,
+    skip_box_thr: float = 0.0,
+    conf_type: ConfType = "avg",
+    allows_overflow: bool = False,
+) -> list[dict[str, torch.Tensor]]:
+    if len(detections_list) == 0:
+        return []
+    # Outer list is the augmentations, inner is the batch
+    batch_size = len(detections_list[0])
+    fused: list[dict[str, torch.Tensor]] = []
+    for idx in range(batch_size):
+        per_image = [detections[idx] for detections in detections_list]
+        fused.append(
+            fuse_detections_wbf_single(
+                per_image,
+                weights=weights,
+                iou_thr=iou_thr,
+                skip_box_thr=skip_box_thr,
+                conf_type=conf_type,
+                allows_overflow=allows_overflow,
+            )
+        )
+    return fused

birder/kernels/load_kernel.py CHANGED Viewed

@@ -14,11 +14,24 @@ logger = logging.getLogger(__name__)
 _CACHED_KERNELS: dict[str, ModuleType] = {}
+_CUSTOM_KERNELS_ENABLED = True
+def set_custom_kernels_enabled(enabled: bool) -> None:
+    global _CUSTOM_KERNELS_ENABLED  # pylint: disable=global-statement
+    _CUSTOM_KERNELS_ENABLED = enabled
+def is_custom_kernels_enabled() -> bool:
+    if os.environ.get("DISABLE_CUSTOM_KERNELS", "0") == "1":
+        return False
+    return _CUSTOM_KERNELS_ENABLED
 def load_msda() -> Optional[ModuleType]:
     name = "msda"
-    if torch.cuda.is_available() is False or os.environ.get("DISABLE_CUSTOM_KERNELS", "0") == "1":
+    if torch.cuda.is_available() is False or is_custom_kernels_enabled() is False:
         return None
     if name in _CACHED_KERNELS:
@@ -60,7 +73,7 @@ def load_msda() -> Optional[ModuleType]:
 def load_swattention() -> Optional[ModuleType]:
     name = "swattention"
-    if torch.cuda.is_available() is False or os.environ.get("DISABLE_CUSTOM_KERNELS", "0") == "1":
+    if torch.cuda.is_available() is False or is_custom_kernels_enabled() is False:
         return None
     if name in _CACHED_KERNELS:
@@ -103,7 +116,7 @@ def load_swattention() -> Optional[ModuleType]:
 def load_soft_nms() -> Optional[ModuleType]:
     name = "soft_nms"
-    if os.environ.get("DISABLE_CUSTOM_KERNELS", "0") == "1":
+    if is_custom_kernels_enabled() is False:
         return None
     if name in _CACHED_KERNELS:
@@ -120,14 +133,6 @@ def load_soft_nms() -> Optional[ModuleType]:
         soft_nms: Optional[ModuleType] = load(
             "soft_nms",
             src_files,
-            with_cuda=True,
-            extra_cflags=["-DWITH_CUDA=1"],
-            extra_cuda_cflags=[
-                "-DCUDA_HAS_FP16=1",
-                "-D__CUDA_NO_HALF_OPERATORS__",
-                "-D__CUDA_NO_HALF_CONVERSIONS__",
-                "-D__CUDA_NO_HALF2_OPERATORS__",
-            ],
         )
     if soft_nms is not None:

birder/kernels/soft_nms/soft_nms.cpp CHANGED Viewed

@@ -61,24 +61,23 @@ void update_sorting_order(torch::Tensor& boxes, torch::Tensor& scores, torch::Te
     std::tie(max_score, t_max_idx) = torch::max(scores.index({Slice(idx + 1, None)}), 0);
     // max_idx is computed from sliced data, therefore need to convert it to "global" max idx
-    auto max_idx = t_max_idx.item<int>() + idx + 1;
-    if (scores.index({idx}).item<float>() < max_score.item<float>()) {
-        auto boxes_idx = boxes.index({idx}).clone();
-        auto boxes_max = boxes.index({max_idx}).clone();
-        boxes.index({idx}) = boxes_max;
-        boxes.index({max_idx}) = boxes_idx;
-        auto scores_idx = scores.index({idx}).clone();
-        auto scores_max = scores.index({max_idx}).clone();
-        scores.index({idx}) = scores_max;
-        scores.index({max_idx}) = scores_idx;
-        auto areas_idx = areas.index({idx}).clone();
-        auto areas_max = areas.index({max_idx}).clone();
-        areas.index({idx}) = areas_max;
-        areas.index({max_idx}) = areas_idx;
-    }
+    auto max_idx = t_max_idx + (idx + 1);
+    auto should_swap = scores.index({idx}) < max_score;
+    auto boxes_idx = boxes.index({idx}).clone();
+    auto boxes_max = boxes.index({max_idx}).clone();
+    boxes.index_put_({idx}, torch::where(should_swap, boxes_max, boxes_idx));
+    boxes.index_put_({max_idx}, torch::where(should_swap, boxes_idx, boxes_max));
+    auto scores_idx = scores.index({idx}).clone();
+    auto scores_max = scores.index({max_idx}).clone();
+    scores.index_put_({idx}, torch::where(should_swap, scores_max, scores_idx));
+    scores.index_put_({max_idx}, torch::where(should_swap, scores_idx, scores_max));
+    auto areas_idx = areas.index({idx}).clone();
+    auto areas_max = areas.index({max_idx}).clone();
+    areas.index_put_({idx}, torch::where(should_swap, areas_max, areas_idx));
+    areas.index_put_({max_idx}, torch::where(should_swap, areas_idx, areas_max));
 }
 std::tuple<torch::Tensor, torch::Tensor> soft_nms(

birder 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

birder 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl