PyPI - potnn - Versions diffs - 1.0.0__py3-none-any.whl - Mend

potnn 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

potnn/__init__.py +86 -0
potnn/codegen/__init__.py +20 -0
potnn/codegen/bit2.py +263 -0
potnn/codegen/fp130.py +269 -0
potnn/codegen/header.py +460 -0
potnn/codegen/level5.py +393 -0
potnn/codegen/scale.py +184 -0
potnn/codegen/ternary.py +354 -0
potnn/codegen/unroll.py +616 -0
potnn/config.py +112 -0
potnn/export.py +2196 -0
potnn/fuse.py +167 -0
potnn/modules/__init__.py +11 -0
potnn/modules/add.py +114 -0
potnn/modules/avgpool.py +173 -0
potnn/modules/base.py +225 -0
potnn/modules/conv.py +203 -0
potnn/modules/conv1d.py +317 -0
potnn/modules/depthwise.py +216 -0
potnn/modules/linear.py +199 -0
potnn/quantize/__init__.py +35 -0
potnn/quantize/calibration.py +233 -0
potnn/quantize/integer_ops.py +207 -0
potnn/quantize/integer_sim.py +225 -0
potnn/quantize/pot.py +455 -0
potnn/quantize/qat.py +356 -0
potnn/utils/__init__.py +13 -0
potnn/utils/allocation.py +240 -0
potnn/utils/memory.py +158 -0
potnn/wrapper.py +304 -0
potnn-1.0.0.dist-info/METADATA +260 -0
potnn-1.0.0.dist-info/RECORD +35 -0
potnn-1.0.0.dist-info/WHEEL +5 -0
potnn-1.0.0.dist-info/licenses/LICENSE +72 -0
potnn-1.0.0.dist-info/top_level.txt +1 -0

potnn/utils/memory.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Memory estimation and validation utilities."""
+import torch
+import torch.nn as nn
+from typing import Dict, Tuple
+def estimate_layer_size(module: nn.Module) -> int:
+    """Estimate the size of a layer in bytes.
+    Args:
+        module: Neural network module
+    Returns:
+        Estimated size in bytes
+    """
+    param_count = 0
+    # Count weight parameters
+    if hasattr(module, 'weight') and module.weight is not None:
+        param_count += module.weight.numel()
+    # Count bias parameters
+    if hasattr(module, 'bias') and module.bias is not None:
+        param_count += module.bias.numel()
+    return param_count
+def estimate_activation_size(model: nn.Module, input_shape: Tuple) -> int:
+    """Estimate maximum activation buffer size needed.
+    Args:
+        model: Neural network model
+        input_shape: Shape of input tensor (without batch dimension)
+    Returns:
+        Maximum activation size in bytes
+    """
+    # Create dummy input
+    dummy_input = torch.zeros(1, *input_shape)
+    device = next(model.parameters()).device
+    dummy_input = dummy_input.to(device)
+    max_size = 0
+    hooks = []
+    def hook_fn(module, input, output):
+        nonlocal max_size
+        if isinstance(output, torch.Tensor):
+            size = output.numel()  # int8 = 1 byte per element
+            max_size = max(max_size, size)
+    # Register hooks
+    for module in model.modules():
+        if isinstance(module, (nn.Conv2d, nn.Linear, nn.MaxPool2d)):
+            hooks.append(module.register_forward_hook(hook_fn))
+    # Run forward pass
+    model.eval()
+    with torch.no_grad():
+        model(dummy_input)
+    # Clean up hooks
+    for hook in hooks:
+        hook.remove()
+    return max_size
+def estimate_memory_usage(model: nn.Module, input_shape: Tuple, mode: str = 'all') -> Dict[str, int]:
+    """Estimate memory usage of the model.
+    Args:
+        model: Neural network model
+        input_shape: Shape of input tensor (without batch dimension)
+        mode: 'all', 'weights', or 'activations'
+    Returns:
+        Dictionary with memory estimates in bytes
+    """
+    result = {}
+    if mode in ['all', 'weights']:
+        # Estimate weight memory
+        total_weights = 0
+        layer_weights = {}
+        for name, module in model.named_modules():
+            if isinstance(module, (nn.Conv2d, nn.Linear)):
+                size = estimate_layer_size(module)
+                layer_weights[name] = size
+                total_weights += size
+        result['total_weights'] = total_weights
+        result['layer_weights'] = layer_weights
+    if mode in ['all', 'activations']:
+        # Estimate activation memory
+        result['max_activation'] = estimate_activation_size(model, input_shape)
+    if mode == 'all':
+        # Input buffer size
+        input_size = torch.zeros(1, *input_shape).numel()
+        result['input_buffer'] = input_size
+        # Total RAM needed (input + activations + some weights in loop mode)
+        result['estimated_ram'] = result['input_buffer'] + result['max_activation']
+        # Total Flash needed (mainly for unrolled weights as code)
+        # This is a rough estimate - actual size depends on unroll/loop decisions
+        result['estimated_flash'] = total_weights * 4  # Rough estimate for unrolled code
+    return result
+def validate_memory(model: nn.Module, flash_budget: int, ram_budget: int,
+                   input_shape: Tuple = (1, 16, 16)) -> bool:
+    """Validate if model fits within memory budgets.
+    Args:
+        model: Neural network model
+        flash_budget: Flash memory budget in bytes
+        ram_budget: RAM budget in bytes
+        input_shape: Input tensor shape (default for 16x16 grayscale)
+    Returns:
+        True if model fits, False otherwise
+    Raises:
+        ValueError: If model doesn't fit with error details
+    """
+    estimates = estimate_memory_usage(model, input_shape)
+    # Check RAM budget
+    min_ram_needed = estimates['input_buffer'] + estimates['max_activation']
+    if min_ram_needed > ram_budget:
+        raise ValueError(
+            f"Model requires at least {min_ram_needed} bytes of RAM "
+            f"(input: {estimates['input_buffer']}, activation: {estimates['max_activation']}), "
+            f"but only {ram_budget} bytes available."
+        )
+    # Check if we can fit weights either in Flash (unrolled) or RAM (loop)
+    # This is a simplified check - actual allocation is done by allocate_hybrid
+    # For unrolled code, estimate ~4 bytes per weight
+    # For loop with packing, estimate ~0.25 bytes per weight (2-bit packing)
+    unrolled_size = estimates['total_weights'] * 4
+    packed_size = estimates['total_weights'] // 4  # 2-bit packing
+    if unrolled_size > flash_budget and packed_size > (ram_budget - min_ram_needed):
+        raise ValueError(
+            f"Model weights ({estimates['total_weights']} parameters) too large. "
+            f"Unrolled: {unrolled_size} bytes > Flash {flash_budget} bytes. "
+            f"Packed: {packed_size} bytes > Available RAM {ram_budget - min_ram_needed} bytes."
+        )
+    return True

potnn/wrapper.py ADDED Viewed

@@ -0,0 +1,304 @@
+"""Model wrapper for potnn conversion."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, Callable
+from torch.utils.data import DataLoader
+from .config import Config
+from .modules import PoTLinear, PoTConv2d
+from .quantize.calibration import calibrate_model
+from .quantize.qat import prepare_qat, alpha_reg_loss, enable_integer_sim
+from .utils import validate_memory, allocate_hybrid
+from .fuse import fuse_batchnorm
+def _normalize_data(data: torch.Tensor, mean: float, std: float) -> torch.Tensor:
+    """Normalize input data to match C inference exactly.
+    C inference uses:
+    - /256 (via shift+8) instead of /255
+    - avg_std (single value) instead of per-channel std
+    QAT must match this for training = deployment consistency.
+    Args:
+        data: Input tensor [N, C, H, W] or [N, C, L], expected range [0, 1] from torchvision
+        mean: Dataset mean - float or List[float]
+        std: Dataset std - float or List[float]
+    Returns:
+        Normalized tensor matching C inference behavior
+    """
+    import torch
+    # Calculate average std (C uses single scale value)
+    if isinstance(std, (list, tuple)):
+        avg_std = sum(std) / len(std)
+    else:
+        avg_std = std
+    # Convert mean to tensor with proper shape for broadcasting
+    # Dynamic: [1, C, 1] for 3D (Conv1d), [1, C, 1, 1] for 4D (Conv2d)
+    if isinstance(mean, (list, tuple)):
+        mean = torch.tensor(mean, dtype=data.dtype, device=data.device)
+        if data.dim() == 3:  # Conv1d: (B, C, L)
+            mean = mean.view(1, -1, 1)
+        else:  # Conv2d: (B, C, H, W)
+            mean = mean.view(1, -1, 1, 1)
+    # Match C inference:
+    # - data comes as [0,1] from torchvision (raw/255)
+    # - C uses raw/256, so multiply by 256/255 to compensate
+    # - C uses avg_std in scale, so divide by avg_std
+    return (data * (256.0 / 255.0) - mean) / avg_std
+def _validate_model(model: nn.Module) -> None:
+    """모델에 일반 nn.Conv2d, nn.Linear가 있는지 검사.
+    potnn은 PoTConv2d, PoTLinear만 지원한다.
+    일반 레이어가 섞여 있으면 export 시 실패하므로 미리 경고.
+    """
+    from .modules.conv import PoTConv2d
+    from .modules.depthwise import PoTDepthwiseConv2d
+    from .modules.linear import PoTLinear
+    errors = []
+    for name, module in model.named_modules():
+        # nn.Conv2d지만 PoTConv2d/PoTDepthwiseConv2d가 아닌 경우
+        if isinstance(module, nn.Conv2d) and not isinstance(module, (PoTConv2d, PoTDepthwiseConv2d)):
+            errors.append(f"  - {name}: nn.Conv2d → potnn.PoTConv2d로 교체 필요")
+        # nn.Linear지만 PoTLinear가 아닌 경우
+        if isinstance(module, nn.Linear) and not isinstance(module, PoTLinear):
+            errors.append(f"  - {name}: nn.Linear → potnn.PoTLinear로 교체 필요")
+    if errors:
+        error_msg = "\n".join(errors)
+        raise ValueError(
+            f"potnn은 PoT 레이어만 지원합니다. 다음 레이어를 교체하세요:\n{error_msg}\n\n"
+            f"예시:\n"
+            f"  nn.Conv2d(1, 16, 3) → potnn.PoTConv2d(1, 16, 3)\n"
+            f"  nn.Linear(256, 10) → potnn.PoTLinear(256, 10)"
+        )
+def train(model: nn.Module,
+          train_loader: DataLoader,
+          test_loader: DataLoader,
+          config: Config,
+          float_epochs: int = 15,
+          qat_epochs: int = 50,
+          float_lr: float = 1e-3,
+          qat_lr: float = 1e-4,
+          device: str = 'cuda',
+          fuse_bn: bool = True,
+          verbose: bool = True) -> nn.Module:
+    """Complete training pipeline: Float → (BN Fusion) → Calibration → QAT → Integer Sim
+    Args:
+        model: PoT model (must use PoTConv2d, PoTLinear)
+        train_loader: Training data loader (raw [0,1] input, NO Normalize transform needed)
+        test_loader: Test data loader (raw [0,1] input, NO Normalize transform needed)
+        config: potnn Config (mean/std used for automatic normalization)
+        float_epochs: Float training epochs (default: 15)
+        qat_epochs: QAT training epochs (default: 50)
+        float_lr: Float training learning rate (default: 1e-3)
+        qat_lr: QAT training learning rate (default: 1e-4)
+        device: 'cuda' or 'cpu' (default: 'cuda')
+        fuse_bn: Fuse BatchNorm layers after float training (default: True)
+        verbose: Print progress (default: True)
+    Returns:
+        Trained model with Integer Simulation enabled.
+        - model.train(): uses Float QAT for fine-tuning
+        - model.eval(): uses Integer Simulation (matches C exactly)
+    Note:
+        Input normalization is handled automatically using config.mean/std.
+        Do NOT add transforms.Normalize() to your DataLoader.
+    """
+    # 모델 검증: 일반 nn.Conv2d, nn.Linear 사용 시 경고
+    _validate_model(model)
+    model = model.to(device)
+    # Get normalization params from config
+    mean = config.mean if config.mean is not None else 0.0
+    std = config.std if config.std is not None else 1.0
+    # Phase 1: Float Training
+    if verbose:
+        print(f"\n[Phase 1] Float Training ({float_epochs} epochs)...")
+    optimizer = torch.optim.AdamW(model.parameters(), lr=float_lr)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float_epochs)
+    best_float_acc = 0
+    for epoch in range(float_epochs):
+        model.train()
+        for data, target in train_loader:
+            data, target = data.to(device), target.to(device)
+            data = _normalize_data(data, mean, std)  # Auto normalize
+            optimizer.zero_grad()
+            output = model(data)
+            loss = F.cross_entropy(output, target)
+            loss.backward()
+            optimizer.step()
+        scheduler.step()
+        acc = _evaluate(model, test_loader, device, mean, std)
+        best_float_acc = max(best_float_acc, acc)
+        if verbose and (epoch % 5 == 0 or epoch == float_epochs - 1):
+            print(f"  Epoch {epoch+1}/{float_epochs}: {acc:.2f}%")
+    if verbose:
+        print(f"  Best Float: {best_float_acc:.2f}%")
+    # Phase 1.5: BatchNorm Fusion (optional)
+    if fuse_bn:
+        if verbose:
+            print(f"\n[Phase 1.5] BatchNorm Fusion...")
+        model = fuse_batchnorm(model)
+    # Phase 2: Calibration
+    if verbose:
+        print(f"\n[Phase 2] Calibration...")
+    calibrate_model(model, train_loader, mean=mean, std=std)
+    # Phase 3: QAT Preparation
+    if verbose:
+        print(f"\n[Phase 3] Preparing QAT...")
+    prepare_qat(model, config)
+    # Set up first layer info for mean absorption during QAT
+    # This ensures QAT uses the same bias as Integer Sim
+    from .modules.base import PoTLayerBase
+    pot_layers = [(name, m) for name, m in model.named_modules() if isinstance(m, PoTLayerBase)]
+    for i, (name, layer) in enumerate(pot_layers):
+        is_first = (i == 0)
+        is_last = (i == len(pot_layers) - 1)
+        layer.set_layer_position(is_first, is_last)
+        if is_first:
+            layer.set_input_std(config.std, config.mean)
+    # Phase 4: QAT Training (Hybrid: float first, integer sim last 20%)
+    if verbose:
+        print(f"\n[Phase 4] QAT Training ({qat_epochs} epochs)...")
+    optimizer = torch.optim.Adam(model.parameters(), lr=qat_lr)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, qat_epochs)
+    # Activation epochs (last 20%)
+    constraint_start_epoch = int(qat_epochs * 0.8)
+    integer_sim_start_epoch = int(qat_epochs * 0.6)  # Float QAT 60% → Integer sim fine-tune 40%
+    best_qat_acc = 0
+    best_state = None
+    for epoch in range(qat_epochs):
+        # Enable 5level constraint for last 20% epochs
+        if epoch == constraint_start_epoch:
+            for name, module in model.named_modules():
+                if hasattr(module, 'enforce_5level_constraint'):
+                    if hasattr(module, 'encoding') and module.encoding == '5level':
+                        module.enforce_5level_constraint = True
+                        if verbose:
+                            print(f"  [{name}] 5level constraint enabled (epoch {epoch+1})")
+        # Enable integer sim for last 20% epochs (fine-tune phase)
+        if epoch == integer_sim_start_epoch:
+            if verbose:
+                print(f"  [Integer Sim] Enabled for fine-tuning (epoch {epoch+1})")
+            enable_integer_sim(model, input_std=config.std, input_mean=config.mean, verbose=False)
+            # Lower learning rate for fine-tuning
+            for param_group in optimizer.param_groups:
+                param_group['lr'] = param_group['lr'] * 0.1
+        # Update integer params each epoch if using integer sim
+        if epoch >= integer_sim_start_epoch:
+            for name, module in model.named_modules():
+                if isinstance(module, PoTLayerBase) and module.use_integer_sim:
+                    module.compute_integer_params()
+        model.train()
+        for data, target in train_loader:
+            data, target = data.to(device), target.to(device)
+            data = _normalize_data(data, mean, std)  # Auto normalize
+            optimizer.zero_grad()
+            output = model(data)
+            loss = F.cross_entropy(output, target) + alpha_reg_loss(model, 0.01)
+            loss.backward()
+            optimizer.step()
+        scheduler.step()
+        acc = _evaluate(model, test_loader, device, mean, std)
+        # Only update best after integer sim starts (to ensure C-compatible weights)
+        if epoch >= integer_sim_start_epoch and acc > best_qat_acc:
+            best_qat_acc = acc
+            best_state = {k: v.clone() for k, v in model.state_dict().items()}
+        if verbose and (epoch % 10 == 0 or epoch == qat_epochs - 1):
+            print(f"  Epoch {epoch+1}/{qat_epochs}: {acc:.2f}%")
+    # Restore best model
+    if best_state is not None:
+        model.load_state_dict(best_state, strict=False)
+    if verbose:
+        print(f"  Best QAT: {best_qat_acc:.2f}%")
+    # Ensure integer sim is enabled for final model
+    enable_integer_sim(model, input_std=config.std, input_mean=config.mean, verbose=verbose)
+    # Final integer params update
+    for name, module in model.named_modules():
+        if isinstance(module, PoTLayerBase) and module.use_integer_sim:
+            module.compute_integer_params()
+    # Final accuracy (with integer sim)
+    final_acc = _evaluate(model, test_loader, device, mean, std)
+    if verbose:
+        print(f"\n[Summary] Float: {best_float_acc:.2f}% → QAT: {best_qat_acc:.2f}% → C-Ready: {final_acc:.2f}%")
+    # Attach stats to model for reporting
+    model.train_stats = {
+        'float_acc': best_float_acc,
+        'qat_acc': best_qat_acc,
+        'final_acc': final_acc
+    }
+    return model
+def _evaluate(model: nn.Module, test_loader: DataLoader, device: str,
+              mean: float = 0.0, std: float = 1.0) -> float:
+    """Evaluate model accuracy with automatic normalization."""
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            data = _normalize_data(data, mean, std)  # Auto normalize
+            output = model(data)
+            _, predicted = output.max(1)
+            total += target.size(0)
+            correct += predicted.eq(target).sum().item()
+    return 100. * correct / total
+# Note: wrap() function has been removed.
+# Users must define models using PoT layers directly:
+#   potnn.PoTConv2d, potnn.PoTConv1d, potnn.PoTLinear, etc.
+# This ensures proper initialization of alpha, QAT parameters, and encoding.