PyPI - pychop - Versions diffs - 0.5.2__py3-none-any.whl - Mend

pychop 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

pychop/__init__.py +87 -0
pychop/bfp_formats.py +390 -0
pychop/bitchop.py +79 -0
pychop/blas.py +74 -0
pychop/builtin/__init__.py +4 -0
pychop/builtin/cparray.py +68 -0
pychop/builtin/cparray_jax.py +202 -0
pychop/builtin/cpfloat.py +87 -0
pychop/builtin/cptensor.py +106 -0
pychop/chop.py +151 -0
pychop/demo_harmonic.py +56 -0
pychop/faultchop.py +222 -0
pychop/fixed_point.py +123 -0
pychop/float_params.py +123 -0
pychop/integer.py +170 -0
pychop/jx/__init__.py +36 -0
pychop/jx/bfp_formats.py +236 -0
pychop/jx/bitchop.py +262 -0
pychop/jx/blas_jx.py +1591 -0
pychop/jx/fixed_point.py +726 -0
pychop/jx/float_point.py +1160 -0
pychop/jx/integer.py +134 -0
pychop/jx/layers.py +4609 -0
pychop/jx/lightchop.py +785 -0
pychop/jx/mx_formats.py +262 -0
pychop/jx/squeeze.py +202 -0
pychop/layers.py +339 -0
pychop/math_func.py +324 -0
pychop/mx_formats.py +344 -0
pychop/np/__init__.py +5 -0
pychop/np/bfp_formats.py +205 -0
pychop/np/bitchop.py +222 -0
pychop/np/blas_np.py +1584 -0
pychop/np/fixed_point.py +560 -0
pychop/np/float_point.py +1180 -0
pychop/np/integer.py +91 -0
pychop/np/lightchop.py +624 -0
pychop/np/mx_formats.py +361 -0
pychop/np/roundit.py +1 -0
pychop/np/squeeze.py +180 -0
pychop/optimizers.py +453 -0
pychop/set_backend.py +80 -0
pychop/simulate.py +196 -0
pychop/tch/__init__.py +22 -0
pychop/tch/bfp_formats.py +278 -0
pychop/tch/bitchop.py +240 -0
pychop/tch/blas_th.py +1648 -0
pychop/tch/fixed_point.py +541 -0
pychop/tch/float_point.py +991 -0
pychop/tch/integer.py +166 -0
pychop/tch/layers.py +2366 -0
pychop/tch/lightchop.py +816 -0
pychop/tch/mx_formats.py +438 -0
pychop/tch/squeeze.py +173 -0
pychop/utils.py +415 -0
pychop-0.5.2.dist-info/METADATA +425 -0
pychop-0.5.2.dist-info/RECORD +60 -0
pychop-0.5.2.dist-info/WHEEL +5 -0
pychop-0.5.2.dist-info/licenses/LICENSE +21 -0
pychop-0.5.2.dist-info/top_level.txt +1 -0

pychop/__init__.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""
+Pychop: Precision Simulation for Low-Precision Arithmetic
+A comprehensive Python package for simulating low-precision arithmetic in
+scientific computing and machine learning, with support for multiple backends
+(NumPy, JAX, PyTorch).
+Supported formats:
+- Floating-point (Chop): IEEE 754 and custom formats
+- Fixed-point (Chopf): Integer and fractional bits
+- Integer quantization (Chopi): Symmetric and asymmetric
+- Block Floating Point (BFP): Shared exponent per block
+- Microscaling (MX): OCP standard with block-level scaling
+Backends:
+- NumPy: Pure numerical computation
+- JAX: Custom VJP for differentiation
+- PyTorch: Straight-Through Estimator (STE) for QAT
+Author: Erin Carson, Xinye Chen
+"""
+from .chop import Chop
+from .integer import Chopi
+from .fixed_point import Chopf
+from .simulate import Simulate
+from .float_params import float_params
+from .bitchop import Bitchop
+from .faultchop import FaultChop
+from .layers import ChopSTE, ChopfSTE, ChopiSTE
+from .math_func import *
+__version__ = '0.5.2'
+import os
+if 'chop_backend' not in os.environ:
+    os.environ['chop_backend'] = 'auto'
+from .set_backend import backend
+from dataclasses import dataclass
+from typing import Optional
+from .bfp_formats import (
+    BFPSpec,
+    BFPTensor,
+    BFP_FORMATS,
+    create_bfp_spec,
+    bfp_quantize,
+    print_bfp_format_table,
+)
+# MX Formats
+from .mx_formats import (
+    MXSpec,
+    MXTensor,
+    MX_FORMATS,
+    create_mx_spec,
+    mx_quantize,
+    compare_mx_formats,
+    print_mx_format_table,
+)
+@dataclass
+class Customs:
+    emax: Optional[int] = None # the maximum value of the exponent.
+    t: Optional[int] = None # the number of bits in the significand (including the hidden bit)
+    exp_bits: Optional[int] = None # the exponent bits
+    sig_bits: Optional[int] = None  # the significand bits (not including the hidden bit)
+@dataclass
+class Options:
+    t: int
+    emax: int
+    prec: int
+    subnormal: bool
+    rmode: bool
+    flip: bool
+    explim: bool
+    p: float

pychop/bfp_formats.py ADDED Viewed

@@ -0,0 +1,390 @@
+"""
+Block Floating Point (BFP) Format - Backend Agnostic Entry Point
+This module provides automatic backend detection and routing for BFP quantization.
+Supports NumPy, JAX, and PyTorch backends with automatic detection.
+Usage:
+    >>> import pychop
+    >>> pychop.backend('auto')  # Auto-detect from input
+    >>>
+    >>> # NumPy
+    >>> import numpy as np
+    >>> X = np.random.randn(1024, 768)
+    >>> X_q = bfp_quantize(X, format='bfp8')
+    >>>
+    >>> # PyTorch (with STE for training)
+    >>> import torch
+    >>> X = torch.randn(128, 768, requires_grad=True)
+    >>> X_q = bfp_quantize(X, format='bfp8')  # Automatic STE!
+    >>>
+    >>> # JAX
+    >>> import jax.numpy as jnp
+    >>> X = jnp.array(np.random.randn(512, 512))
+    >>> X_q = bfp_quantize(X, format='bfp8')
+Author: Xinye Chen
+"""
+import os
+from typing import Union, Tuple, Optional, Any
+from dataclasses import dataclass
+# ============================================================================
+# Backend Detection (inline to avoid import issues)
+# ============================================================================
+def _detect_array_type(x: Any) -> str:
+    """
+    Detect backend from input array type.
+    Parameters
+    ----------
+    x : Any
+        Input array or scalar
+    Returns
+    -------
+    str
+        Backend name: 'numpy', 'torch', or 'jax'
+    """
+    module = type(x).__module__
+    if "torch" in module:
+        return "torch"
+    if "jax" in module:
+        return "jax"
+    return "numpy"
+def _get_backend_env() -> str:
+    """Get backend from environment variable."""
+    return os.environ.get('chop_backend', 'auto')
+# ============================================================================
+# BFP Format Specification (Backend-Independent)
+# ============================================================================
+@dataclass
+class BFPSpec:
+    """
+    Block Floating Point format specification.
+    This is backend-independent and shared across all implementations.
+    Attributes
+    ----------
+    name : str
+        Format name
+    mantissa_bits : int
+        Number of mantissa bits per element (including sign)
+    block_size : int
+        Number of elements sharing same exponent
+    exponent_bits : int
+        Number of bits for shared exponent
+    has_sign : bool
+        Whether elements have sign bits
+    use_subnormals : bool
+        Whether to support subnormal numbers
+    """
+    name: str
+    mantissa_bits: int
+    block_size: int
+    exponent_bits: int = 8
+    has_sign: bool = True
+    use_subnormals: bool = False
+    @property
+    def total_bits_per_block(self) -> int:
+        """Total bits for entire block."""
+        return self.exponent_bits + (self.mantissa_bits * self.block_size)
+    @property
+    def compression_vs_fp32(self) -> float:
+        """Compression ratio vs FP32."""
+        fp32_bits = 32 * self.block_size
+        return fp32_bits / self.total_bits_per_block
+    @property
+    def compression_vs_fp16(self) -> float:
+        """Compression ratio vs FP16."""
+        fp16_bits = 16 * self.block_size
+        return fp16_bits / self.total_bits_per_block
+    def __repr__(self):
+        return (f"BFPSpec(name='{self.name}', mantissa={self.mantissa_bits}b, "
+                f"block_size={self.block_size}, exponent={self.exponent_bits}b)")
+# Predefined BFP formats (shared across all backends)
+BFP_FORMATS = {
+    'bfp16': BFPSpec('bfp16', mantissa_bits=16, block_size=16, exponent_bits=8),
+    'bfp12': BFPSpec('bfp12', mantissa_bits=12, block_size=16, exponent_bits=8),
+    'bfp8': BFPSpec('bfp8', mantissa_bits=8, block_size=32, exponent_bits=8),
+    'bfp6': BFPSpec('bfp6', mantissa_bits=6, block_size=32, exponent_bits=8),
+    'bfp4': BFPSpec('bfp4', mantissa_bits=4, block_size=32, exponent_bits=8),
+    'bfp3': BFPSpec('bfp3', mantissa_bits=3, block_size=64, exponent_bits=8),
+    'bfp2': BFPSpec('bfp2', mantissa_bits=2, block_size=128, exponent_bits=8),
+    'flexpoint16': BFPSpec('flexpoint16', mantissa_bits=16, block_size=16, exponent_bits=5),
+    'flexpoint8': BFPSpec('flexpoint8', mantissa_bits=8, block_size=32, exponent_bits=5),
+}
+def create_bfp_spec(
+    mantissa_bits: int,
+    block_size: int,
+    exponent_bits: int = 8,
+    name: Optional[str] = None
+) -> BFPSpec:
+    """
+    Create custom BFP format specification.
+    Parameters
+    ----------
+    mantissa_bits : int
+        Number of mantissa bits (1-32)
+    block_size : int
+        Elements per block
+    exponent_bits : int
+        Bits for shared exponent
+    name : str, optional
+        Custom name
+    Returns
+    -------
+    BFPSpec
+        BFP format specification
+    """
+    if name is None:
+        name = f"custom_bfp{mantissa_bits}"
+    return BFPSpec(
+        name=name,
+        mantissa_bits=mantissa_bits,
+        block_size=block_size,
+        exponent_bits=exponent_bits
+    )
+# ============================================================================
+# Backend Detection and Routing
+# ============================================================================
+def _resolve_backend(X: Any = None) -> str:
+    """
+    Resolve which backend to use.
+    Parameters
+    ----------
+    X : Any, optional
+        Input array (if provided, used for auto-detection)
+    Returns
+    -------
+    str
+        Backend name: 'numpy', 'jax', or 'torch'
+    """
+    env_backend = _get_backend_env()
+    if env_backend == 'auto':
+        if X is not None:
+            return _detect_array_type(X)
+        else:
+            # Default to numpy if no input provided
+            return 'numpy'
+    if env_backend not in {'numpy', 'jax', 'torch'}:
+        raise ValueError(
+            f"Invalid backend: {env_backend}. "
+            "Must be 'numpy', 'jax', 'torch', or 'auto'."
+        )
+    return env_backend
+def _get_backend_module(backend: str):
+    """
+    Get backend-specific BFP implementation.
+    Parameters
+    ----------
+    backend : str
+        Backend name
+    Returns
+    -------
+    module
+        Backend-specific BFP module
+    """
+    if backend == 'torch':
+        try:
+            from .tch import bfp_formats as backend_module
+        except ImportError:
+            raise ImportError(
+                "PyTorch backend not available. "
+                "Install with: pip install torch"
+            )
+    elif backend == 'jax':
+        try:
+            from .jx import bfp_formats as backend_module
+        except ImportError:
+            raise ImportError(
+                "JAX backend not available. "
+                "Install with: pip install jax jaxlib flax"
+            )
+    elif backend == 'numpy':
+        from .np import bfp_formats as backend_module
+    else:
+        raise ValueError(f"Unsupported backend: {backend}")
+    return backend_module
+# ============================================================================
+# User-Facing Functions
+# ============================================================================
+def bfp_quantize(
+    data: Any,
+    format: Union[str, BFPSpec, Tuple[int, int]] = 'bfp8',
+    backend: Optional[str] = None
+) -> Any:
+    """
+    Quantize array to BFP format.
+    Automatically detects backend from input type or uses specified backend.
+    Parameters
+    ----------
+    data : array-like
+        Input data (numpy.ndarray, torch.Tensor, or jax.Array)
+    format : str, BFPSpec, or tuple
+        BFP format specification
+    backend : str, optional
+        Force specific backend ('numpy', 'jax', or 'torch')
+        If None, auto-detects from input
+    Returns
+    -------
+    array-like
+        Quantized data (same type as input)
+    Examples
+    --------
+    >>> # NumPy
+    >>> import numpy as np
+    >>> X = np.random.randn(1024, 768)
+    >>> X_q = bfp_quantize(X, format='bfp8')
+    >>>
+    >>> # PyTorch (with automatic STE if requires_grad=True)
+    >>> import torch
+    >>> X = torch.randn(128, 768, requires_grad=True)
+    >>> X_q = bfp_quantize(X, format='bfp8')
+    >>> loss = X_q.sum()
+    >>> loss.backward()  # Gradients flow through!
+    >>>
+    >>> # Custom format
+    >>> X_q = bfp_quantize(X, format=(4, 32))  # 4-bit mantissa, 32 elem/block
+    """
+    # Resolve backend
+    if backend is None:
+        backend = _resolve_backend(data)
+    # Get backend module
+    backend_module = _get_backend_module(backend)
+    # Call backend-specific quantization
+    return backend_module.bfp_quantize(data, format=format)
+class BFPTensor:
+    """
+    Backend-agnostic BFP tensor wrapper.
+    Automatically routes to appropriate backend implementation.
+    Parameters
+    ----------
+    data : array-like
+        Input tensor
+    format : str, BFPSpec, or tuple
+        BFP format
+    backend : str, optional
+        Force specific backend
+    Examples
+    --------
+    >>> # NumPy backend
+    >>> import numpy as np
+    >>> X = np.random.randn(1024, 768)
+    >>> bfp = BFPTensor(X, format='bfp8')
+    >>> X_reconstructed = bfp.dequantize()
+    >>> stats = bfp.statistics()
+    """
+    def __init__(
+        self,
+        data: Any,
+        format: Union[str, BFPSpec, Tuple[int, int]] = 'bfp8',
+        backend: Optional[str] = None
+    ):
+        # Resolve backend
+        if backend is None:
+            self.backend = _resolve_backend(data)
+        else:
+            self.backend = backend
+        # Get backend module
+        backend_module = _get_backend_module(self.backend)
+        # Create backend-specific tensor
+        self._impl = backend_module.BFPTensor_(data, format=format)
+    def dequantize(self) -> Any:
+        """Dequantize to original data type."""
+        return self._impl.dequantize()
+    def statistics(self) -> dict:
+        """Get quantization statistics."""
+        return self._impl.statistics()
+    def __repr__(self):
+        return f"BFPTensor(backend={self.backend}, impl={self._impl})"
+def print_bfp_format_table():
+    """Print table of predefined BFP formats."""
+    print("="*90)
+    print("Predefined BFP Formats")
+    print("="*90)
+    header = (f"{'Name':<15} {'Mantissa':<10} {'Block Size':<12} "
+              f"{'Exponent':<10} {'Compress FP16':<15} {'Total Bits':<12}")
+    print(header)
+    print("-"*90)
+    for name, spec in BFP_FORMATS.items():
+        row = (f"{spec.name:<15} "
+               f"{spec.mantissa_bits:<10} "
+               f"{spec.block_size:<12} "
+               f"{spec.exponent_bits:<10} "
+               f"{spec.compression_vs_fp16:.2f}x{'':>11} "
+               f"{spec.total_bits_per_block}")
+        print(row)
+    print("="*90)
+__all__ = [
+    'BFPSpec',
+    'BFPTensor',
+    'BFP_FORMATS',
+    'create_bfp_spec',
+    'bfp_quantize',
+    'print_bfp_format_table',
+]

pychop/bitchop.py ADDED Viewed

@@ -0,0 +1,79 @@
+import os
+import numpy as np
+def Bitchop(exp_bits, sig_bits, rmode="nearest_even", subnormal=True, random_state=42, device="cpu", verbose=0):
+    """
+    Parameters
+    ----------
+    exp_bits : int
+        Number of bits for the exponent in the target format. Determines the range
+        of representable values (e.g., 5 bits gives a bias of 15, range -14 to 15).
+    sig_bits : int
+        Number of bits for the significand (mantissa) in the target format, excluding
+        the implicit leading 1 for normalized numbers (e.g., 4 bits allows 0 to 15 plus implicit 1).
+    subnormal : boolean
+        Whether or not support subnormal numbers are supported.
+        If set `subnormal=False`, subnormals are flushed to zero.
+    rmode : int or str, default="nearest_even"
+        Rounding mode to use when quantizing the significand. Options are:
+        - 1 or "nearest_even": Round to nearest value, ties to even (IEEE 754 default).
+        - 0 or "nearest_odd": Round to nearest value, ties to odd.
+        - 2 or "plus_infinity": Round towards plus infinity (round up).
+        - 3 or "minus_infinity": Round towards minus infinity (round down).
+        - 4 or "toward_zero": Truncate toward zero (no rounding up).
+        - 5 or "stochastic_prop": Stochastic rounding proportional to the fractional part.
+        - 6 or "stochastic_equal": Stochastic rounding with 50% probability.
+    random_state : int, default=0
+        Random seed set for stochastic rounding settings.
+    device : str or torch.device, optional, default="cpu"
+        Device to perform computations on (e.g., "cpu", "cuda").
+    subnormal (bool, optional): If True, supports denormalized numbers (subnormals) when
+        the exponent underflows, shifting the significand. If False, underflows result in zero.
+        Defaults to True.
+    verbose : int | bool, defaul=0
+        Whether or not to print out the unit-roundoff.
+    Properties
+    ----------
+    u : float,
+        Unit roundoff corresponding to the floating point format
+    Methods
+    ----------
+    Bitchop(x)
+        Method that convert ``x`` to the user-specific arithmetic format.
+    Returns
+    ----------
+    Bitchop | object,
+        ``Chop`` instance.
+    """
+    if os.environ['chop_backend'] == 'torch':
+        from .tch.bitchop import Bitchop
+        obj = Bitchop(exp_bits=exp_bits, sig_bits=sig_bits, subnormal=subnormal, device=device,
+                   random_state=random_state, rmode=rmode)
+    elif os.environ['chop_backend'] == 'jax':
+        from .jx.bitchop import Bitchop
+        obj = Bitchop(exp_bits=exp_bits, sig_bits=sig_bits, subnormal=subnormal, device=device,
+                   random_state=random_state, rmode=rmode)
+    else:
+        from .np.bitchop import Bitchop
+        obj = Bitchop(exp_bits=exp_bits, sig_bits=sig_bits, subnormal=subnormal, random_state=random_state, rmode=rmode)
+    obj.u = 2**sig_bits / 2
+    if verbose:
+        print("The floating point format is with unit-roundoff of {:e}".format(
+            obj.u)+" (≈2^"+str(int(np.log2(obj.u)))+").")
+    return obj

pychop/blas.py ADDED Viewed

@@ -0,0 +1,74 @@
+from pychop import LightChop
+import torch
+import pychop
+pychop.backend('torch')
+precision_configs = {
+    'q52': {'exp_bits': 5, 'sig_bits': 2, 'rmode': 1},
+    'q43': {'exp_bits': 4, 'sig_bits': 3, 'rmode': 1},
+    'bf16': {'exp_bits': 8, 'sig_bits': 7, 'rmode': 1},
+    'half': {'exp_bits': 5, 'sig_bits': 10, 'rmode': 1},
+    'tf32': {'exp_bits': 8, 'sig_bits': 10, 'rmode': 1},
+    'fp32': {'exp_bits': 8, 'sig_bits': 23, 'rmode': 1},
+    'fp64': {'exp_bits': 11, 'sig_bits': 52, 'rmode': 1}
+}
+precision_fallback = ['q52', 'q43', 'bf16', 'half', 'tf32', 'fp32', 'fp64'] # Precision fallback order
+def chop(x, precision_idx=0):
+    """Recursive chop function"""
+    if not torch.is_tensor(x):
+        x = torch.tensor(x, dtype=torch.float64, device=device)
+    if precision_idx >= len(precision_fallback):
+        return x
+    precision = precision_fallback[precision_idx]
+    if precision == 'fp64':
+        return x
+    ch = LightChop(**precision_configs[precision])
+    result = ch(x)
+    if not torch.any(torch.isnan(result)) and not torch.any(torch.isinf(result)):
+        return result.to(torch.float64).to(device)
+    logging.debug(f"Chop: Precision {precision} failed, escalating to {precision_fallback[precision_idx + 1]}")
+    return chop(x, precision_idx + 1)
+def rounding(x, precision):
+    return chop(x, precision_idx=precision_fallback.index(precision))
+def mixed_precision_op(op, x, precision, y=None):
+    """Mixed-precision operation"""
+    x = rounding(x, precision)
+    if y is None:
+        unrounded = op(x)
+    else:
+        y = rounding(y, precision)
+        unrounded = op(x, y)
+    if precision == 'fp64':
+        return unrounded.to(device)
+    result = chop(unrounded, precision_idx=precision_fallback.index(precision))
+    return result.to(device)
+def round_sparse_matrix(A, precision):
+    """Round sparse matrix to specified precision"""
+    if precision == 'fp64':
+        return A
+    A_coo = A.tocoo()
+    data = torch.tensor(A_coo.data, dtype=torch.float64, device=device)
+    ch = LightChop(**precision_configs[precision])
+    rounded_data = ch(data)
+    if torch.any(torch.isnan(rounded_data)) or torch.any(torch.isinf(rounded_data)):
+        logging.warning(f"Rounding sparse matrix to {precision} failed; using fp64")
+        return A
+    return csc_matrix((rounded_data.cpu().numpy(), (A_coo.row, A_coo.col)), shape=A.shape)
+if __name__ == "__main__":
+  import numpy as np
+  A = np.random.randn(100, 100)
+  B = np.random.randn(100, 100)
+  C = A + B
+  print("C:", C)
+  print("C (fp32):",mixed_precision_op(lambda x, y: x+y, A, 'fp32', B))

pychop/builtin/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cpfloat import *
+from .cparray import *
+from .cparray_jax import *
+from .cptensor import *

pychop/builtin/cparray.py ADDED Viewed

@@ -0,0 +1,68 @@
+import numpy as np
+from pychop import Chop  # Or: from pychop import Chop
+class CPArray(np.ndarray):
+    """
+    A NumPy array subclass that maintains chopped precision after arithmetic ops.
+    - Inherits from np.ndarray for full compatibility.
+    - Uses Chop for rounding arrays.
+    - Operations return CPArray instances (chopped post-op).
+    """
+    def __new__(cls, input_array, chopper=None):
+        if chopper is None:
+            raise ValueError("Must provide a chopper (Chop or Chop instance)")
+        # Chop the base array FIRST (pure ndarray) to avoid subclass recursion
+        base_input = np.asarray(input_array)  # Strip any subclass
+        chopped_base = chopper(base_input)    # Chop on pure -> pure chopped ndarray
+        # Now view the pre-chopped base as CPArray (no re-chop)
+        obj = chopped_base.view(cls)
+        obj.chopper = chopper
+        return obj
+    def __array_finalize__(self, obj):
+        if obj is None:
+            return
+        self.chopper = getattr(obj, 'chopper', None)
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        """
+        Override for ufuncs (+, -, *, /, etc.): Compute on pure arrays, chop pure result, view as CPArray.
+        """
+        # Validate same chopper for CPArray inputs
+        for inp in inputs:
+            if isinstance(inp, CPArray) and inp.chopper != self.chopper:
+                raise ValueError("All CPArray inputs must use the same chopper")
+        # Compute on pure ndarrays
+        full_inputs = [np.asarray(x) for x in inputs]  # Strip subclasses
+        result = getattr(ufunc, method)(*full_inputs, **kwargs)  # Pure computation
+        # Chop the pure result
+        chopped_result = self.chopper(result)  # Chop on pure -> pure chopped
+        # Return as CPArray (views pre-chopped; no recursion)
+        if chopped_result.ndim == 0:
+            return chopped_result.item()  # Scalar fallback
+        else:
+            return CPArray(chopped_result, self.chopper)  # Safe view
+    # Matmul: Strip self to pure before computation
+    def __matmul__(self, other):
+        self_pure = self.view(np.ndarray)  # Strip subclass
+        other_pure = np.asarray(other)
+        result = np.matmul(self_pure, other_pure)
+        return CPArray(result, self.chopper)  # Views pre-chopped result
+    def __rmatmul__(self, other):
+        return CPArray(np.matmul(np.asarray(other), self.view(np.ndarray)), self.chopper)
+    # Utility: View as regular array
+    def to_regular(self):
+        return np.asarray(self)
+    def __str__(self):
+        prec_info = f"exp_bits={self.chopper.exp_bits}, sig_bits={self.chopper.sig_bits}" if hasattr(self.chopper, 'exp_bits') else "custom"
+        return f"CPArray({np.array2string(self)}, {prec_info})"
+    def __repr__(self):
+        return str(self)