PyPI - potnn - Versions diffs - 1.0.0__py3-none-any.whl - Mend

potnn 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

potnn/__init__.py +86 -0
potnn/codegen/__init__.py +20 -0
potnn/codegen/bit2.py +263 -0
potnn/codegen/fp130.py +269 -0
potnn/codegen/header.py +460 -0
potnn/codegen/level5.py +393 -0
potnn/codegen/scale.py +184 -0
potnn/codegen/ternary.py +354 -0
potnn/codegen/unroll.py +616 -0
potnn/config.py +112 -0
potnn/export.py +2196 -0
potnn/fuse.py +167 -0
potnn/modules/__init__.py +11 -0
potnn/modules/add.py +114 -0
potnn/modules/avgpool.py +173 -0
potnn/modules/base.py +225 -0
potnn/modules/conv.py +203 -0
potnn/modules/conv1d.py +317 -0
potnn/modules/depthwise.py +216 -0
potnn/modules/linear.py +199 -0
potnn/quantize/__init__.py +35 -0
potnn/quantize/calibration.py +233 -0
potnn/quantize/integer_ops.py +207 -0
potnn/quantize/integer_sim.py +225 -0
potnn/quantize/pot.py +455 -0
potnn/quantize/qat.py +356 -0
potnn/utils/__init__.py +13 -0
potnn/utils/allocation.py +240 -0
potnn/utils/memory.py +158 -0
potnn/wrapper.py +304 -0
potnn-1.0.0.dist-info/METADATA +260 -0
potnn-1.0.0.dist-info/RECORD +35 -0
potnn-1.0.0.dist-info/WHEEL +5 -0
potnn-1.0.0.dist-info/licenses/LICENSE +72 -0
potnn-1.0.0.dist-info/top_level.txt +1 -0

potnn/fuse.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""BatchNorm fusion into Conv/Linear layers."""
+import torch
+import torch.nn as nn
+from typing import Dict, List, Tuple, Optional
+def fuse_batchnorm(model: nn.Module) -> nn.Module:
+    """Fuse BatchNorm layers into preceding Conv/Linear layers.
+    This absorbs BatchNorm parameters (γ, β, μ, σ) into the weight and bias
+    of the preceding convolution or linear layer, eliminating the need for
+    separate BatchNorm computation at inference time.
+    Formula:
+        y = γ * (x - μ) / √(σ² + ε) + β
+    For Conv/Linear followed by BatchNorm:
+        out = BN(W*x + b)
+            = scale * (W*x + b) + bias'
+            = (scale * W) * x + (scale * b + bias')
+        where:
+            scale = γ / √(σ² + ε)
+            bias' = β - γ * μ / √(σ² + ε)
+        Therefore:
+            W_fused = W * scale
+            b_fused = b * scale + bias' = (b - μ) * scale + β
+    Args:
+        model: Model with Conv/Linear + BatchNorm sequences
+    Returns:
+        Model with BatchNorm fused (BatchNorm layers become identity)
+    """
+    print("Fusing BatchNorm layers...")
+    # Find Conv/Linear -> BatchNorm pairs
+    pairs = _find_bn_pairs(model)
+    if not pairs:
+        print("  No BatchNorm layers found to fuse.")
+        return model
+    # Fuse each pair
+    for conv_name, bn_name, conv_module, bn_module in pairs:
+        _fuse_single_bn(conv_module, bn_module)
+        print(f"  Fused: {conv_name} <- {bn_name}")
+    # Replace BatchNorm layers with Identity
+    _replace_bn_with_identity(model, [bn_name for _, bn_name, _, _ in pairs])
+    print(f"  Total {len(pairs)} BatchNorm layers fused.")
+    return model
+def _find_bn_pairs(model: nn.Module) -> List[Tuple[str, str, nn.Module, nn.Module]]:
+    """Find Conv/Linear -> BatchNorm pairs in the model.
+    Returns:
+        List of (conv_name, bn_name, conv_module, bn_module) tuples
+    """
+    pairs = []
+    prev_name = None
+    prev_module = None
+    for name, module in model.named_modules():
+        if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
+            # Check if previous layer is Conv or Linear
+            if prev_module is not None:
+                if isinstance(prev_module, (nn.Conv2d, nn.Linear)):
+                    pairs.append((prev_name, name, prev_module, module))
+                elif hasattr(prev_module, 'weight'):
+                    # PoTConv2d or PoTLinear
+                    pairs.append((prev_name, name, prev_module, module))
+        # Track previous layer (skip non-compute layers)
+        if isinstance(module, (nn.Conv2d, nn.Linear)) or hasattr(module, 'weight'):
+            if not isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
+                prev_name = name
+                prev_module = module
+    return pairs
+def _fuse_single_bn(conv: nn.Module, bn: nn.Module):
+    """Fuse a single BatchNorm into its preceding Conv/Linear.
+    Modifies conv.weight and conv.bias in-place.
+    """
+    with torch.no_grad():
+        # Get BatchNorm parameters
+        gamma = bn.weight  # γ (scale)
+        beta = bn.bias     # β (shift)
+        mean = bn.running_mean  # μ
+        var = bn.running_var    # σ²
+        eps = bn.eps
+        # Compute scale factor: γ / √(σ² + ε)
+        std = torch.sqrt(var + eps)
+        scale = gamma / std
+        # Compute bias adjustment: β - γ * μ / √(σ² + ε)
+        bias_adjust = beta - gamma * mean / std
+        # Get conv weight shape
+        weight = conv.weight
+        if isinstance(conv, nn.Conv2d) or (hasattr(conv, 'kernel_size')):
+            # Conv2d: weight shape is [out_ch, in_ch, kH, kW]
+            # Scale each output channel
+            scale_shape = scale.view(-1, 1, 1, 1)
+            conv.weight.data = weight * scale_shape
+        else:
+            # Linear: weight shape is [out_features, in_features]
+            scale_shape = scale.view(-1, 1)
+            conv.weight.data = weight * scale_shape
+        # Handle bias
+        if conv.bias is not None:
+            # Existing bias: b_fused = b * scale + bias_adjust
+            conv.bias.data = conv.bias * scale + bias_adjust
+        else:
+            # No existing bias: create one with just bias_adjust
+            conv.bias = nn.Parameter(bias_adjust.clone())
+def _replace_bn_with_identity(model: nn.Module, bn_names: List[str]):
+    """Replace BatchNorm layers with Identity.
+    This ensures the fused BatchNorm layers don't affect forward pass.
+    """
+    for bn_name in bn_names:
+        # Navigate to parent and replace
+        parts = bn_name.split('.')
+        if len(parts) == 1:
+            # Top-level module
+            setattr(model, bn_name, nn.Identity())
+        else:
+            # Nested module
+            parent = model
+            for part in parts[:-1]:
+                if part.isdigit():
+                    parent = parent[int(part)]
+                else:
+                    parent = getattr(parent, part)
+            child_name = parts[-1]
+            if child_name.isdigit():
+                parent[int(child_name)] = nn.Identity()
+            else:
+                setattr(parent, child_name, nn.Identity())
+def check_bn_fused(model: nn.Module) -> bool:
+    """Check if all BatchNorm layers have been fused.
+    Returns:
+        True if no BatchNorm layers remain (or all are Identity)
+    """
+    for module in model.modules():
+        if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
+            return False
+    return True

potnn/modules/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Neural network modules for potnn."""
+from .base import PoTLayerBase
+from .linear import PoTLinear
+from .conv import PoTConv2d
+from .conv1d import PoTConv1d
+from .depthwise import PoTDepthwiseConv2d
+from .add import PoTAdd
+from .avgpool import PoTGlobalAvgPool
+__all__ = ['PoTLayerBase', 'PoTLinear', 'PoTConv2d', 'PoTConv1d', 'PoTDepthwiseConv2d', 'PoTAdd', 'PoTGlobalAvgPool']

potnn/modules/add.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""PoT Add layer for skip/residual connections."""
+import torch
+import torch.nn as nn
+import math
+class PoTAdd(nn.Module):
+    """Add layer for residual/skip connections with scale alignment.
+    Skip connection에서 두 branch의 scale이 다를 수 있음:
+        - x: 원래 입력 (scale_x)
+        - y: conv 거친 출력 (scale_y)
+    이 레이어는 scale 정합 후 더하기를 수행:
+        output = rescale(x) + y
+    rescale은 정수 MUL + shift로 구현:
+        x_aligned = (x * rescale_mult) >> rescale_shift
+    컴파일 타임에 rescale_mult, rescale_shift 계산.
+    런타임에 float 연산 없음.
+    사용 예:
+        # ResNet block
+        identity = x
+        out = conv2(relu(conv1(x)))
+        out = add_layer(identity, out)  # identity + out with scale alignment
+        out = relu(out)
+    """
+    def __init__(self):
+        """Initialize PoTAdd layer."""
+        super().__init__()
+        # Scale alignment: x_aligned = (x * rescale_mult) >> rescale_shift
+        self.register_buffer('rescale_mult', torch.tensor(128))  # 기본값: 1.0 * 128
+        self.register_buffer('rescale_shift', torch.tensor(7))   # 기본값: >>7
+        # Activation scale for output (set during calibration)
+        self.register_buffer('act_scale', None)
+        # Scale info for the two inputs (set during calibration)
+        self.register_buffer('scale_x', None)  # scale of first input (skip)
+        self.register_buffer('scale_y', None)  # scale of second input (conv output)
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """Forward pass: aligned add.
+        Args:
+            x: First input (typically skip/identity branch)
+            y: Second input (typically conv output)
+        Returns:
+            x + y with scale alignment applied to x
+        """
+        # QAT mode: simulate integer rescale
+        if getattr(self, 'quantize', False) and self.scale_x is not None:
+            # Simulate: x_aligned = (x * rescale_mult) >> rescale_shift
+            # In float: x_aligned = x * (rescale_mult / 2^rescale_shift)
+            ratio = self.rescale_mult.float() / (1 << self.rescale_shift.item())
+            x = x * ratio
+        return x + y
+    def set_scales(self, scale_x: float, scale_y: float):
+        """Set input scales and compute rescale_mult/rescale_shift.
+        C 코드: skip_rescaled = (skip_int * mult) >> shift
+        skip을 conv scale 기준으로 맞추려면:
+            ratio = scale_y / scale_x  (conv/skip)
+        Args:
+            scale_x: Activation scale of first input (skip branch)
+            scale_y: Activation scale of second input (conv branch)
+        """
+        self.scale_x = torch.tensor(scale_x)
+        self.scale_y = torch.tensor(scale_y)
+        # C 코드와 일치: skip을 conv scale 기준으로 변환
+        ratio = scale_y / scale_x
+        # 정수 양자화: ratio ≈ rescale_mult / 2^rescale_shift
+        # mult = ratio * 2^shift, shift를 조정하여 mult를 1~255 범위로
+        base_shift = 7
+        mult = round(ratio * (1 << base_shift))
+        # mult가 너무 크면 shift 감소 (mult = ratio * 2^shift)
+        while mult > 255 and base_shift > 0:
+            base_shift -= 1
+            mult = round(ratio * (1 << base_shift))
+        # mult가 너무 작으면 shift 증가
+        while mult < 32 and base_shift < 15:
+            base_shift += 1
+            mult = round(ratio * (1 << base_shift))
+        # clamp mult to safe range
+        mult = max(1, min(255, mult))
+        self.rescale_mult = torch.tensor(mult)
+        self.rescale_shift = torch.tensor(base_shift)
+        # Output scale is same as y's scale (after alignment)
+        self.act_scale = torch.tensor(scale_y)
+    def extra_repr(self) -> str:
+        """String representation."""
+        s = f"rescale_mult={self.rescale_mult.item()}, rescale_shift={self.rescale_shift.item()}"
+        if self.scale_x is not None:
+            ratio = self.scale_x.item() / self.scale_y.item()
+            approx = self.rescale_mult.item() / (1 << self.rescale_shift.item())
+            s += f", ratio={ratio:.3f}, approx={approx:.3f}"
+        return s

potnn/modules/avgpool.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""PoT Global Average Pooling layer."""
+import torch
+import torch.nn as nn
+import math
+from ..quantize.integer_ops import round_half_up_ste, floor_ste
+class PoTGlobalAvgPool(nn.Module):
+    """Global Average Pooling with PoT-compatible quantization.
+    [Integer-Only QAT Mode]
+    Forward pass simulates C integer arithmetic:
+    - Power of 2 size: (sum + (size//2)) >> log2(size)
+    - Generic size: (sum * div_mult + round_const) >> div_shift
+    """
+    def __init__(self):
+        """Initialize PoTGlobalAvgPool."""
+        super().__init__()
+        # Division parameters
+        self.register_buffer('div_mult', torch.tensor(1))
+        self.register_buffer('div_shift', torch.tensor(0))
+        self.register_buffer('pool_size', torch.tensor(0))
+        # Activation scale (passed from previous layer)
+        self.register_buffer('act_scale', None)
+        self.quantize = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass: global average pooling.
+        Args:
+            x: Input tensor of shape (N, C, H, W)
+        Returns:
+            Output tensor of shape (N, C)
+        """
+        if not self.quantize:
+            # Float mode
+            return x.mean(dim=(2, 3))
+        if not getattr(self, 'integer_sim_enabled', False):
+            # Float QAT mode
+            return x.mean(dim=(2, 3))
+        # Calculate pool size dynamically if not set
+        current_pool_size = x.shape[2] * x.shape[3]
+        if self.pool_size.item() != current_pool_size:
+            self.set_pool_size(x.shape[2], x.shape[3])
+        # Integer mode
+        # x is float but represents integer values (from previous layer)
+        # We assume input is already scaled by act_scale of previous layer?
+        # No, in our new design, previous layer output is "dequantized" float.
+        # So x is float.
+        # But GAP in C operates on the accumulated integer values?
+        # Wait, C GAP input is the output of the previous layer *before* requantization?
+        # No, usually GAP follows a Conv/ReLU layer.
+        # The previous layer output is int8 (requantized).
+        # So x here is int8 values (represented as float).
+        # But wait, our Conv layer returns `out / act_scale`.
+        # So x is float.
+        # We need to recover the int8 values: `x_int = round(x * prev_scale)`
+        # But `prev_scale` is `act_scale` of previous layer.
+        # If we assume `act_scale` is passed to this layer, we can use it.
+        # However, for GAP, usually we just average the values.
+        # mean(x) = sum(x) / N
+        # If x = x_int / scale, then mean(x) = sum(x_int) / N / scale
+        # = (sum(x_int) / N) / scale
+        # So we can just compute mean(x) in float?
+        # NO! The rounding behavior of `sum(x_int) / N` in integer arithmetic is different from float mean.
+        # C: `(sum(x_int) + N//2) >> log2(N)`
+        # Python float: `mean(x_int)` (exact)
+        # We must simulate the integer division on `x_int`.
+        # So:
+        # 1. Recover x_int: x_int = round(x * act_scale)
+        # 2. Compute sum(x_int)
+        # 3. Integer division
+        # 4. Convert back to float: result / act_scale
+        # We need `act_scale` of the input.
+        # Usually this is passed or stored.
+        # Let's assume `act_scale` is available (set by `set_prev_act_scale` or similar mechanism).
+        # But `PoTGlobalAvgPool` doesn't inherit `PoTLayerBase` currently.
+        # Let's assume for now we just operate on `x` assuming it's `x_int` if `act_scale` is 1.0.
+        # But wait, if we don't know `act_scale`, we can't recover `x_int`.
+        # In the user's specific case (SimpleNet), GAP follows Conv2.
+        # Conv2 output is `out / act_scale`.
+        # So GAP input is float.
+        # If we want to match C, we need to know `act_scale`.
+        # Let's check how `PoTGlobalAvgPool` is used.
+        # It seems it's used in `SimpleNet`.
+        # We should probably add `act_scale` management to `PoTGlobalAvgPool`.
+        # For now, let's implement the integer logic assuming `x` is `x_int`?
+        # No, `PoTConv2d` divides by scale.
+        # Solution: `PoTGlobalAvgPool` needs `act_scale`.
+        # We'll add `set_act_scale` method.
+        scale = self.act_scale if self.act_scale is not None else torch.tensor(1.0)
+        # Input should be integer values from previous layer
+        # Round to ensure exact integer (may have floating point precision errors)
+        # Use STE to maintain gradient flow during training
+        x_int = round_half_up_ste(x)
+        # 2. Sum over H, W
+        sum_val = x_int.sum(dim=(2, 3))
+        # 3. Integer Division
+        pool_size = int(self.pool_size.item())
+        if (pool_size & (pool_size - 1)) == 0:
+            # Power of 2
+            shift = int(math.log2(pool_size))
+            round_const = 1 << (shift - 1)
+            # Power of 2
+            shift = int(math.log2(pool_size))
+            round_const = 1 << (shift - 1)
+            # (sum + round) >> shift
+            out_int = floor_ste((sum_val + round_const) / (1 << shift))
+        else:
+            # Generic
+            mult = self.div_mult.item()
+            shift = self.div_shift.item()
+            # (sum * mult + round) >> shift
+            # round_const for shift is 1<<(shift-1)
+            # But wait, C generic implementation:
+            # avg = (sum * div_mult + (1<<(div_shift-1))) >> div_shift
+            round_const = 1 << (shift - 1) if shift > 0 else 0
+            val = sum_val * mult + round_const
+            out_int = floor_ste(val / (1 << shift))
+        # Output is int8 (no conversion back to float)
+        return out_int
+    def set_pool_size(self, h: int, w: int):
+        """Set pool size and compute div_mult/div_shift."""
+        pool_size = h * w
+        self.pool_size = torch.tensor(pool_size)
+        if pool_size > 0 and (pool_size & (pool_size - 1)) == 0:
+            self.div_mult = torch.tensor(1)
+            self.div_shift = torch.tensor(int(math.log2(pool_size)))
+        else:
+            base_shift = 15
+            mult = round((1 << base_shift) / pool_size)
+            while mult > 255 and base_shift > 8:
+                base_shift -= 1
+                mult = round((1 << base_shift) / pool_size)
+            self.div_mult = torch.tensor(max(1, min(65535, mult)))
+            self.div_shift = torch.tensor(base_shift)
+    def prepare_qat(self, act_scale=None):
+        self.quantize = True
+        if act_scale is not None:
+            self.act_scale = torch.tensor(act_scale)
+    def extra_repr(self) -> str:
+        return f"pool_size={self.pool_size.item()}, quantize={self.quantize}"

potnn/modules/base.py ADDED Viewed

@@ -0,0 +1,225 @@
+"""Base class for all PoT (Power-of-Two) quantized layers."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PoTLayerBase(nn.Module):
+    """Base class for all PoT layers with alpha scaling and activation quantization.
+    This class provides:
+    - Alpha scaling parameter (learnable)
+    - Activation scale (fixed after calibration)
+    - QAT (Quantization-Aware Training) mode management
+    - Alpha regularization loss
+    - Integer simulation mode for C-compatible inference
+    """
+    def __init__(self, encoding='unroll'):
+        """Initialize PoT layer base.
+        Args:
+            encoding: Encoding type for weight quantization
+                   - 'unroll': 17레벨 {0, ±1, ±2, ..., ±128} (default)
+                   - 'fp130':  16레벨 {±1, ±2, ..., ±128} (Zero 없음)
+                   - '5level': 5레벨 {-8, -1, 0, 1, 8}
+                   - '2bit':   4레벨 {-2, -1, 1, 2} (Zero 없음)
+                   - 'ternary': 3레벨 {-1, 0, 1}
+        """
+        super().__init__()
+        self.encoding = encoding
+        # Alpha scaling parameter (learnable)
+        # raw_alpha → softplus → clamp(0.01) → alpha
+        self.raw_alpha = nn.Parameter(torch.tensor(0.5))
+        # Alpha initial value for regularization
+        # This will be updated during calibration to match the initialized alpha
+        self.register_buffer('alpha_init', torch.tensor(0.5))
+        # Activation scale (fixed after calibration)
+        self.register_buffer('act_scale', None)
+        # QAT mode flag
+        self.quantize = False
+        # === Integer Simulation Parameters ===
+        # These enable C-compatible integer arithmetic simulation
+        # Layer position flags
+        self.register_buffer('is_first_layer', torch.tensor(False))
+        self.register_buffer('is_last_layer', torch.tensor(False))
+        # Previous layer's act_scale (for scale chain)
+        self.register_buffer('prev_act_scale', None)
+        # Input std (for first layer standardization absorption)
+        # Per-channel tensor [in_ch] or None
+        self.register_buffer('input_std', None)
+        # Input mean (for first layer bias adjustment)
+        # Per-channel tensor [in_ch] or None
+        self.register_buffer('input_mean', None)
+        # Pre-computed integer scale parameters
+        self.register_buffer('scale_int', None)
+        self.register_buffer('shift', None)
+        # Integer simulation mode flag
+        self.use_integer_sim = False
+        # 5level encoding constraint flag
+        # When True, enforces max 3 consecutive zeros (skip field is 2 bits)
+        self.enforce_5level_constraint = False
+    @property
+    def alpha(self):
+        """Get positive alpha value using softplus + clamp.
+        Returns:
+            Positive alpha value for scaling PoT weights.
+        """
+        return F.softplus(self.raw_alpha).clamp(min=0.01)
+    def calibrate(self, act_max):
+        """Set activation scale based on calibration.
+        Args:
+            act_max: Maximum activation value from calibration.
+        """
+        if act_max > 0:
+            self.act_scale = torch.tensor(127.0 / act_max)
+        else:
+            self.act_scale = torch.tensor(1.0)
+    def prepare_qat(self):
+        """Enable QAT (Quantization-Aware Training) mode."""
+        self.quantize = True
+    def alpha_reg_loss(self, lambda_reg=0.01):
+        """Calculate alpha regularization loss.
+        This loss encourages alpha to stay close to its initial value,
+        preventing it from drifting too far during training.
+        Args:
+            lambda_reg: Regularization strength (default: 0.01)
+        Returns:
+            Alpha regularization loss value.
+        """
+        # Use the stored alpha_init which is set during calibration
+        return lambda_reg * (self.alpha - self.alpha_init) ** 2
+    # === Integer Simulation Methods ===
+    def set_layer_position(self, is_first: bool, is_last: bool):
+        """Set layer position in the network.
+        Args:
+            is_first: True if this is the first PoT layer (input is uint8)
+            is_last: True if this is the last PoT layer (no ReLU)
+        """
+        self.is_first_layer = torch.tensor(is_first)
+        self.is_last_layer = torch.tensor(is_last)
+    def set_prev_act_scale(self, prev_scale: float):
+        """Set previous layer's activation scale.
+        Args:
+            prev_scale: Previous layer's act_scale value
+        """
+        if prev_scale is not None:
+            self.prev_act_scale = torch.tensor(prev_scale)
+        else:
+            self.prev_act_scale = None
+    def set_input_std(self, std, mean=None):
+        """Set input statistics for first layer.
+        Args:
+            std: Standard deviation - float (single channel) or List[float] (multi-channel)
+            mean: Mean values - float (single channel) or List[float] (multi-channel)
+        """
+        # Convert to per-channel tensor
+        if isinstance(std, (int, float)):
+            self.input_std = torch.tensor([float(std)])
+        else:
+            self.input_std = torch.tensor([float(s) for s in std])
+        if mean is not None:
+            if isinstance(mean, (int, float)):
+                self.input_mean = torch.tensor([float(mean)])
+            else:
+                self.input_mean = torch.tensor([float(m) for m in mean])
+        else:
+            self.input_mean = None
+    def compute_integer_params(self):
+        """Compute integer scale parameters for C-compatible inference.
+        MUST match export.py calculate_combined_scales() exactly!
+        Returns:
+            (scale_int, shift) tuple
+        """
+        scale_int, shift, _ = self._compute_scale_and_shift()
+        self.scale_int = torch.tensor(scale_int, device=self.raw_alpha.device)
+        self.shift = torch.tensor(shift, device=self.raw_alpha.device)
+        return scale_int, shift
+    def _compute_scale_and_shift(self):
+        """Internal method to compute scale_int and shift dynamically.
+        Returns:
+            (scale_int, shift, combined_scale)
+        """
+        # self.alpha is already softplus(raw_alpha).clamp(0.01) via property
+        alpha = self.alpha.item()
+        act_scale = self.act_scale.item() if self.act_scale is not None else None
+        is_first = self.is_first_layer.item()
+        # Calculate combined_scale - EXACTLY like export.py
+        if is_first:
+            # Use average std for combined_scale (matches export.py)
+            if self.input_std is not None:
+                input_std = self.input_std.mean().item()
+            else:
+                input_std = 1.0
+            if act_scale is not None:
+                combined_scale = alpha * act_scale / input_std
+            else:
+                combined_scale = alpha / input_std
+        else:
+            prev_scale = self.prev_act_scale.item() if self.prev_act_scale is not None else 1.0
+            if act_scale is not None:
+                combined_scale = alpha * act_scale / prev_scale
+            else:
+                combined_scale = alpha / prev_scale
+        # Determine shift - EXACTLY like export.py
+        base_shift = 0
+        scale_magnitude = abs(combined_scale)
+        # Target: scale_int around 64-512 for precision (export.py uses 64-512)
+        while scale_magnitude < 64 and base_shift < 24:
+            scale_magnitude *= 2
+            base_shift += 1
+        while scale_magnitude > 512 and base_shift > 0:
+            scale_magnitude /= 2
+            base_shift -= 1
+        # For first layer, add +8 for /256 absorption
+        if is_first:
+            combined_shift = base_shift + 8
+        else:
+            combined_shift = base_shift
+        # Calculate integer scale
+        scale_int = round(combined_scale * (1 << base_shift))
+        return scale_int, combined_shift, combined_scale