PyPI - inference-models - Versions diffs - 0.18.3__py3-none-any.whl - Mend

inference-models 0.18.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

inference_models/__init__.py +36 -0
inference_models/configuration.py +72 -0
inference_models/constants.py +2 -0
inference_models/entities.py +5 -0
inference_models/errors.py +137 -0
inference_models/logger.py +52 -0
inference_models/model_pipelines/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/__init__.py +0 -0
inference_models/model_pipelines/auto_loaders/core.py +120 -0
inference_models/model_pipelines/auto_loaders/pipelines_registry.py +36 -0
inference_models/model_pipelines/face_and_gaze_detection/__init__.py +0 -0
inference_models/model_pipelines/face_and_gaze_detection/mediapipe_l2cs.py +200 -0
inference_models/models/__init__.py +0 -0
inference_models/models/auto_loaders/__init__.py +0 -0
inference_models/models/auto_loaders/access_manager.py +168 -0
inference_models/models/auto_loaders/auto_negotiation.py +1329 -0
inference_models/models/auto_loaders/auto_resolution_cache.py +129 -0
inference_models/models/auto_loaders/constants.py +7 -0
inference_models/models/auto_loaders/core.py +1341 -0
inference_models/models/auto_loaders/dependency_models.py +52 -0
inference_models/models/auto_loaders/entities.py +57 -0
inference_models/models/auto_loaders/models_registry.py +497 -0
inference_models/models/auto_loaders/presentation_utils.py +333 -0
inference_models/models/auto_loaders/ranking.py +413 -0
inference_models/models/auto_loaders/utils.py +31 -0
inference_models/models/base/__init__.py +0 -0
inference_models/models/base/classification.py +123 -0
inference_models/models/base/depth_estimation.py +62 -0
inference_models/models/base/documents_parsing.py +111 -0
inference_models/models/base/embeddings.py +66 -0
inference_models/models/base/instance_segmentation.py +87 -0
inference_models/models/base/keypoints_detection.py +93 -0
inference_models/models/base/object_detection.py +143 -0
inference_models/models/base/semantic_segmentation.py +74 -0
inference_models/models/base/types.py +5 -0
inference_models/models/clip/__init__.py +0 -0
inference_models/models/clip/clip_onnx.py +148 -0
inference_models/models/clip/clip_pytorch.py +104 -0
inference_models/models/clip/preprocessing.py +162 -0
inference_models/models/common/__init__.py +0 -0
inference_models/models/common/cuda.py +30 -0
inference_models/models/common/model_packages.py +25 -0
inference_models/models/common/onnx.py +379 -0
inference_models/models/common/roboflow/__init__.py +0 -0
inference_models/models/common/roboflow/model_packages.py +361 -0
inference_models/models/common/roboflow/post_processing.py +436 -0
inference_models/models/common/roboflow/pre_processing.py +1332 -0
inference_models/models/common/torch.py +20 -0
inference_models/models/common/trt.py +266 -0
inference_models/models/deep_lab_v3_plus/__init__.py +0 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py +282 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py +264 -0
inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py +313 -0
inference_models/models/depth_anything_v2/__init__.py +0 -0
inference_models/models/depth_anything_v2/depth_anything_v2_hf.py +77 -0
inference_models/models/dinov3/__init__.py +0 -0
inference_models/models/dinov3/dinov3_classification_onnx.py +348 -0
inference_models/models/dinov3/dinov3_classification_torch.py +323 -0
inference_models/models/doctr/__init__.py +0 -0
inference_models/models/doctr/doctr_torch.py +304 -0
inference_models/models/easy_ocr/__init__.py +0 -0
inference_models/models/easy_ocr/easy_ocr_torch.py +222 -0
inference_models/models/florence2/__init__.py +0 -0
inference_models/models/florence2/florence2_hf.py +897 -0
inference_models/models/grounding_dino/__init__.py +0 -0
inference_models/models/grounding_dino/grounding_dino_torch.py +227 -0
inference_models/models/l2cs/__init__.py +0 -0
inference_models/models/l2cs/l2cs_onnx.py +216 -0
inference_models/models/mediapipe_face_detection/__init__.py +0 -0
inference_models/models/mediapipe_face_detection/face_detection.py +203 -0
inference_models/models/moondream2/__init__.py +0 -0
inference_models/models/moondream2/moondream2_hf.py +281 -0
inference_models/models/owlv2/__init__.py +0 -0
inference_models/models/owlv2/cache.py +182 -0
inference_models/models/owlv2/entities.py +112 -0
inference_models/models/owlv2/owlv2_hf.py +695 -0
inference_models/models/owlv2/reference_dataset.py +291 -0
inference_models/models/paligemma/__init__.py +0 -0
inference_models/models/paligemma/paligemma_hf.py +209 -0
inference_models/models/perception_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/perception_encoder_pytorch.py +197 -0
inference_models/models/perception_encoder/vision_encoder/__init__.py +0 -0
inference_models/models/perception_encoder/vision_encoder/config.py +160 -0
inference_models/models/perception_encoder/vision_encoder/pe.py +742 -0
inference_models/models/perception_encoder/vision_encoder/rope.py +344 -0
inference_models/models/perception_encoder/vision_encoder/tokenizer.py +342 -0
inference_models/models/perception_encoder/vision_encoder/transforms.py +33 -0
inference_models/models/qwen25vl/__init__.py +1 -0
inference_models/models/qwen25vl/qwen25vl_hf.py +285 -0
inference_models/models/resnet/__init__.py +0 -0
inference_models/models/resnet/resnet_classification_onnx.py +330 -0
inference_models/models/resnet/resnet_classification_torch.py +305 -0
inference_models/models/resnet/resnet_classification_trt.py +369 -0
inference_models/models/rfdetr/__init__.py +0 -0
inference_models/models/rfdetr/backbone_builder.py +101 -0
inference_models/models/rfdetr/class_remapping.py +41 -0
inference_models/models/rfdetr/common.py +115 -0
inference_models/models/rfdetr/default_labels.py +108 -0
inference_models/models/rfdetr/dinov2_with_windowed_attn.py +1330 -0
inference_models/models/rfdetr/misc.py +26 -0
inference_models/models/rfdetr/ms_deform_attn.py +180 -0
inference_models/models/rfdetr/ms_deform_attn_func.py +60 -0
inference_models/models/rfdetr/position_encoding.py +166 -0
inference_models/models/rfdetr/post_processor.py +83 -0
inference_models/models/rfdetr/projector.py +373 -0
inference_models/models/rfdetr/rfdetr_backbone_pytorch.py +394 -0
inference_models/models/rfdetr/rfdetr_base_pytorch.py +807 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_onnx.py +206 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_pytorch.py +373 -0
inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py +227 -0
inference_models/models/rfdetr/rfdetr_object_detection_onnx.py +244 -0
inference_models/models/rfdetr/rfdetr_object_detection_pytorch.py +470 -0
inference_models/models/rfdetr/rfdetr_object_detection_trt.py +270 -0
inference_models/models/rfdetr/segmentation_head.py +273 -0
inference_models/models/rfdetr/transformer.py +767 -0
inference_models/models/roboflow_instant/__init__.py +0 -0
inference_models/models/roboflow_instant/roboflow_instant_hf.py +141 -0
inference_models/models/sam/__init__.py +0 -0
inference_models/models/sam/cache.py +147 -0
inference_models/models/sam/entities.py +25 -0
inference_models/models/sam/sam_torch.py +675 -0
inference_models/models/sam2/__init__.py +0 -0
inference_models/models/sam2/cache.py +162 -0
inference_models/models/sam2/entities.py +43 -0
inference_models/models/sam2/sam2_torch.py +905 -0
inference_models/models/sam2_rt/__init__.py +0 -0
inference_models/models/sam2_rt/sam2_pytorch.py +119 -0
inference_models/models/smolvlm/__init__.py +0 -0
inference_models/models/smolvlm/smolvlm_hf.py +245 -0
inference_models/models/trocr/__init__.py +0 -0
inference_models/models/trocr/trocr_hf.py +53 -0
inference_models/models/vit/__init__.py +0 -0
inference_models/models/vit/vit_classification_huggingface.py +319 -0
inference_models/models/vit/vit_classification_onnx.py +326 -0
inference_models/models/vit/vit_classification_trt.py +365 -0
inference_models/models/yolact/__init__.py +1 -0
inference_models/models/yolact/yolact_instance_segmentation_onnx.py +336 -0
inference_models/models/yolact/yolact_instance_segmentation_trt.py +361 -0
inference_models/models/yolo_world/__init__.py +1 -0
inference_models/models/yolonas/__init__.py +0 -0
inference_models/models/yolonas/nms.py +44 -0
inference_models/models/yolonas/yolonas_object_detection_onnx.py +204 -0
inference_models/models/yolonas/yolonas_object_detection_trt.py +230 -0
inference_models/models/yolov10/__init__.py +0 -0
inference_models/models/yolov10/yolov10_object_detection_onnx.py +187 -0
inference_models/models/yolov10/yolov10_object_detection_trt.py +215 -0
inference_models/models/yolov11/__init__.py +0 -0
inference_models/models/yolov11/yolov11_onnx.py +28 -0
inference_models/models/yolov11/yolov11_torch_script.py +25 -0
inference_models/models/yolov11/yolov11_trt.py +21 -0
inference_models/models/yolov12/__init__.py +0 -0
inference_models/models/yolov12/yolov12_onnx.py +7 -0
inference_models/models/yolov12/yolov12_torch_script.py +7 -0
inference_models/models/yolov12/yolov12_trt.py +7 -0
inference_models/models/yolov5/__init__.py +0 -0
inference_models/models/yolov5/nms.py +99 -0
inference_models/models/yolov5/yolov5_instance_segmentation_onnx.py +225 -0
inference_models/models/yolov5/yolov5_instance_segmentation_trt.py +255 -0
inference_models/models/yolov5/yolov5_object_detection_onnx.py +192 -0
inference_models/models/yolov5/yolov5_object_detection_trt.py +218 -0
inference_models/models/yolov7/__init__.py +0 -0
inference_models/models/yolov7/yolov7_instance_segmentation_onnx.py +226 -0
inference_models/models/yolov7/yolov7_instance_segmentation_trt.py +253 -0
inference_models/models/yolov8/__init__.py +0 -0
inference_models/models/yolov8/yolov8_classification_onnx.py +181 -0
inference_models/models/yolov8/yolov8_instance_segmentation_onnx.py +239 -0
inference_models/models/yolov8/yolov8_instance_segmentation_torch_script.py +201 -0
inference_models/models/yolov8/yolov8_instance_segmentation_trt.py +268 -0
inference_models/models/yolov8/yolov8_key_points_detection_onnx.py +263 -0
inference_models/models/yolov8/yolov8_key_points_detection_torch_script.py +218 -0
inference_models/models/yolov8/yolov8_key_points_detection_trt.py +287 -0
inference_models/models/yolov8/yolov8_object_detection_onnx.py +213 -0
inference_models/models/yolov8/yolov8_object_detection_torch_script.py +166 -0
inference_models/models/yolov8/yolov8_object_detection_trt.py +231 -0
inference_models/models/yolov9/__init__.py +0 -0
inference_models/models/yolov9/yolov9_onnx.py +7 -0
inference_models/models/yolov9/yolov9_torch_script.py +7 -0
inference_models/models/yolov9/yolov9_trt.py +7 -0
inference_models/runtime_introspection/__init__.py +0 -0
inference_models/runtime_introspection/core.py +410 -0
inference_models/utils/__init__.py +0 -0
inference_models/utils/download.py +608 -0
inference_models/utils/environment.py +28 -0
inference_models/utils/file_system.py +51 -0
inference_models/utils/hashing.py +7 -0
inference_models/utils/imports.py +48 -0
inference_models/utils/onnx_introspection.py +17 -0
inference_models/weights_providers/__init__.py +0 -0
inference_models/weights_providers/core.py +20 -0
inference_models/weights_providers/entities.py +159 -0
inference_models/weights_providers/roboflow.py +601 -0
inference_models-0.18.3.dist-info/METADATA +466 -0
inference_models-0.18.3.dist-info/RECORD +195 -0
inference_models-0.18.3.dist-info/WHEEL +5 -0
inference_models-0.18.3.dist-info/top_level.txt +1 -0

inference_models/models/rfdetr/projector.py ADDED Viewed

@@ -0,0 +1,373 @@
+# ------------------------------------------------------------------------
+# RF-DETR
+# Copyright (c) 2025 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+# Modified from LW-DETR (https://github.com/Atten4Vis/LW-DETR)
+# Copyright (c) 2024 Baidu. All Rights Reserved.
+# ------------------------------------------------------------------------
+# Modified from ViTDet (https://github.com/facebookresearch/detectron2/tree/main/projects/ViTDet)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# ------------------------------------------------------------------------
+"""
+Projector
+"""
+import math
+import random
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class LayerNorm(nn.Module):
+    """
+    A LayerNorm variant, popularized by Transformers, that performs point-wise mean and
+    variance normalization over the channel dimension for inputs that have shape
+    (batch_size, channels, height, width).
+    https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
+    """
+    def __init__(self, normalized_shape, eps=1e-6):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.normalized_shape = (normalized_shape,)
+    def forward(self, x):
+        """
+        LayerNorm forward
+        TODO: this is a hack to avoid overflow when using fp16
+        """
+        x = x.permute(0, 2, 3, 1)
+        x = F.layer_norm(x, (x.size(3),), self.weight, self.bias, self.eps)
+        x = x.permute(0, 3, 1, 2)
+        return x
+def get_norm(norm, out_channels):
+    """
+    Args:
+        norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
+            or a callable that takes a channel number and returns
+            the normalization layer as a nn.Module.
+    Returns:
+        nn.Module or None: the normalization layer
+    """
+    if norm is None:
+        return None
+    if isinstance(norm, str):
+        if len(norm) == 0:
+            return None
+        norm = {
+            "LN": lambda channels: LayerNorm(channels),
+        }[norm]
+    return norm(out_channels)
+def get_activation(name, inplace=False):
+    """get activation"""
+    if name == "silu":
+        module = nn.SiLU(inplace=inplace)
+    elif name == "relu":
+        module = nn.ReLU(inplace=inplace)
+    elif name in ["LeakyReLU", "leakyrelu", "lrelu"]:
+        module = nn.LeakyReLU(0.1, inplace=inplace)
+    elif name is None:
+        module = nn.Identity()
+    else:
+        raise AttributeError("Unsupported act type: {}".format(name))
+    return module
+class ConvX(nn.Module):
+    """Conv-bn module"""
+    def __init__(
+        self,
+        in_planes,
+        out_planes,
+        kernel=3,
+        stride=1,
+        groups=1,
+        dilation=1,
+        act="relu",
+        layer_norm=False,
+        rms_norm=False,
+    ):
+        super(ConvX, self).__init__()
+        if not isinstance(kernel, tuple):
+            kernel = (kernel, kernel)
+        padding = (kernel[0] // 2, kernel[1] // 2)
+        self.conv = nn.Conv2d(
+            in_planes,
+            out_planes,
+            kernel_size=kernel,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            dilation=dilation,
+            bias=False,
+        )
+        if rms_norm:
+            self.bn = nn.RMSNorm(out_planes)
+        else:
+            self.bn = (
+                get_norm("LN", out_planes) if layer_norm else nn.BatchNorm2d(out_planes)
+            )
+        self.act = get_activation(act, inplace=True)
+    def forward(self, x):
+        """forward"""
+        out = self.act(self.bn(self.conv(x.contiguous())))
+        return out
+class Bottleneck(nn.Module):
+    """Standard bottleneck."""
+    def __init__(
+        self,
+        c1,
+        c2,
+        shortcut=True,
+        g=1,
+        k=(3, 3),
+        e=0.5,
+        act="silu",
+        layer_norm=False,
+        rms_norm=False,
+    ):
+        """ch_in, ch_out, shortcut, groups, kernels, expand"""
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = ConvX(
+            c1, c_, k[0], 1, act=act, layer_norm=layer_norm, rms_norm=rms_norm
+        )
+        self.cv2 = ConvX(
+            c_, c2, k[1], 1, groups=g, act=act, layer_norm=layer_norm, rms_norm=rms_norm
+        )
+        self.add = shortcut and c1 == c2
+    def forward(self, x):
+        """'forward()' applies the YOLOv5 FPN to input data."""
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+class C2f(nn.Module):
+    """Faster Implementation of CSP Bottleneck with 2 convolutions."""
+    def __init__(
+        self,
+        c1,
+        c2,
+        n=1,
+        shortcut=False,
+        g=1,
+        e=0.5,
+        act="silu",
+        layer_norm=False,
+        rms_norm=False,
+    ):
+        """ch_in, ch_out, number, shortcut, groups, expansion"""
+        super().__init__()
+        self.c = int(c2 * e)  # hidden channels
+        self.cv1 = ConvX(
+            c1, 2 * self.c, 1, 1, act=act, layer_norm=layer_norm, rms_norm=rms_norm
+        )
+        self.cv2 = ConvX(
+            (2 + n) * self.c, c2, 1, act=act, layer_norm=layer_norm, rms_norm=rms_norm
+        )  # optional act=FReLU(c2)
+        self.m = nn.ModuleList(
+            Bottleneck(
+                self.c,
+                self.c,
+                shortcut,
+                g,
+                k=(3, 3),
+                e=1.0,
+                act=act,
+                layer_norm=layer_norm,
+                rms_norm=rms_norm,
+            )
+            for _ in range(n)
+        )
+    def forward(self, x):
+        """Forward pass using split() instead of chunk()."""
+        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y.extend(m(y[-1]) for m in self.m)
+        return self.cv2(torch.cat(y, 1))
+class MultiScaleProjector(nn.Module):
+    """
+    This module implements MultiScaleProjector in :paper:`lwdetr`.
+    It creates pyramid features built on top of the input feature map.
+    """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        scale_factors,
+        num_blocks=3,
+        layer_norm=False,
+        rms_norm=False,
+        survival_prob=1.0,
+        force_drop_last_n_features=0,
+    ):
+        """
+        Args:
+            net (Backbone): module representing the subnetwork backbone.
+                Must be a subclass of :class:`Backbone`.
+            out_channels (int): number of channels in the output feature maps.
+            scale_factors (list[float]): list of scaling factors to upsample or downsample
+                the input features for creating pyramid features.
+        """
+        super(MultiScaleProjector, self).__init__()
+        self.scale_factors = scale_factors
+        self.survival_prob = survival_prob
+        self.force_drop_last_n_features = force_drop_last_n_features
+        stages_sampling = []
+        stages = []
+        # use_bias = norm == ""
+        use_bias = False
+        self.use_extra_pool = False
+        for scale in scale_factors:
+            stages_sampling.append([])
+            for in_dim in in_channels:
+                out_dim = in_dim
+                layers = []
+                # if in_dim > 512:
+                #     layers.append(ConvX(in_dim, in_dim // 2, kernel=1))
+                #     in_dim = in_dim // 2
+                if scale == 4.0:
+                    layers.extend(
+                        [
+                            nn.ConvTranspose2d(
+                                in_dim, in_dim // 2, kernel_size=2, stride=2
+                            ),
+                            get_norm("LN", in_dim // 2),
+                            nn.GELU(),
+                            nn.ConvTranspose2d(
+                                in_dim // 2, in_dim // 4, kernel_size=2, stride=2
+                            ),
+                        ]
+                    )
+                    out_dim = in_dim // 4
+                elif scale == 2.0:
+                    # a hack to reduce the FLOPs and Params when the dimention of output feature is too large
+                    # if in_dim > 512:
+                    #     layers = [
+                    #         ConvX(in_dim, in_dim // 2, kernel=1),
+                    #         nn.ConvTranspose2d(in_dim // 2, in_dim // 4, kernel_size=2, stride=2),
+                    #     ]
+                    #     out_dim = in_dim // 4
+                    # else:
+                    layers.extend(
+                        [
+                            nn.ConvTranspose2d(
+                                in_dim, in_dim // 2, kernel_size=2, stride=2
+                            ),
+                        ]
+                    )
+                    out_dim = in_dim // 2
+                elif scale == 1.0:
+                    pass
+                elif scale == 0.5:
+                    layers.extend(
+                        [
+                            ConvX(in_dim, in_dim, 3, 2, layer_norm=layer_norm),
+                        ]
+                    )
+                elif scale == 0.25:
+                    self.use_extra_pool = True
+                    continue
+                else:
+                    raise NotImplementedError(
+                        "Unsupported scale_factor:{}".format(scale)
+                    )
+                layers = nn.Sequential(*layers)
+                stages_sampling[-1].append(layers)
+            stages_sampling[-1] = nn.ModuleList(stages_sampling[-1])
+            in_dim = int(sum(in_channel // max(1, scale) for in_channel in in_channels))
+            layers = [
+                C2f(in_dim, out_channels, num_blocks, layer_norm=layer_norm),
+                get_norm("LN", out_channels),
+            ]
+            layers = nn.Sequential(*layers)
+            stages.append(layers)
+        self.stages_sampling = nn.ModuleList(stages_sampling)
+        self.stages = nn.ModuleList(stages)
+    def forward(self, x):
+        """
+        Args:
+            x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``.
+        Returns:
+            dict[str->Tensor]:
+                mapping from feature map name to pyramid feature map tensor
+                in high to low resolution order. Returned feature names follow the FPN
+                convention: "p<stage>", where stage has stride = 2 ** stage e.g.,
+                ["p2", "p3", ..., "p6"].
+        """
+        num_features = len(x)
+        if self.survival_prob < 1.0 and self.training:
+            final_drop_prob = 1 - self.survival_prob
+            drop_p = np.random.uniform()
+            for i in range(1, num_features):
+                critical_drop_prob = i * (final_drop_prob / (num_features - 1))
+                if drop_p < critical_drop_prob:
+                    x[i][:] = 0
+        elif self.force_drop_last_n_features > 0:
+            for i in range(self.force_drop_last_n_features):
+                # don't do it inplace to ensure the compiler can optimize out the backbone layers
+                x[-(i + 1)] = torch.zeros_like(x[-(i + 1)])
+        results = []
+        # x list of len(out_features_indexes)
+        for i, stage in enumerate(self.stages):
+            feat_fuse = []
+            for j, stage_sampling in enumerate(self.stages_sampling[i]):
+                feat_fuse.append(stage_sampling(x[j]))
+            if len(feat_fuse) > 1:
+                feat_fuse = torch.cat(feat_fuse, dim=1)
+            else:
+                feat_fuse = feat_fuse[0]
+            results.append(stage(feat_fuse))
+        if self.use_extra_pool:
+            results.append(
+                F.max_pool2d(results[-1], kernel_size=1, stride=2, padding=0)
+            )
+        return results
+class SimpleProjector(nn.Module):
+    def __init__(self, in_dim, out_dim, factor_kernel=False):
+        super(SimpleProjector, self).__init__()
+        if not factor_kernel:
+            self.convx1 = ConvX(in_dim, in_dim * 2, layer_norm=True, act="silu")
+            self.convx2 = ConvX(in_dim * 2, out_dim, layer_norm=True, act="silu")
+        else:
+            self.convx1 = ConvX(
+                in_dim, out_dim, kernel=(3, 1), layer_norm=True, act="silu"
+            )
+            self.convx2 = ConvX(
+                out_dim, out_dim, kernel=(1, 3), layer_norm=True, act="silu"
+            )
+        self.ln = get_norm("LN", out_dim)
+    def forward(self, x):
+        """forward"""
+        out = self.ln(self.convx2(self.convx1(x[0])))
+        return [out]