PyPI - JSTprove - Versions diffs - 1.0.0__py3-none-macosx_11_0_arm64.whl → 1.2.0__py3-none-macosx_11_0_arm64.whl - Mend

JSTprove 1.0.0__py3-none-macosx_11_0_arm64.whl → 1.2.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of JSTprove might be problematic. Click here for more details.

Files changed (61) hide show

python/core/model_processing/onnx_quantizer/layers/base.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from __future__ import annotations
 from dataclasses import dataclass
+from typing import ClassVar
 import numpy as np
 import onnx
 from onnx import helper, numpy_helper
 from python.core.model_processing.onnx_custom_ops.onnx_helpers import (
+    extract_attributes,
     replace_input_references,
 )
 from python.core.model_processing.onnx_quantizer.exceptions import (
@@ -188,7 +190,7 @@ class BaseOpQuantizer:
         """
         self.validate_node_has_output(node)
-        original_output = node.output.get(0)
+        original_output = node.output[0]
         quantized_output = original_output + "_raw"
         node.output[0] = quantized_output
@@ -294,6 +296,61 @@ class BaseOpQuantizer:
         # === Mutate the original node ===
         return nodes, new_inputs
+    def add_scaled_initializer_inputs(
+        self: BaseOpQuantizer,
+        node: onnx.NodeProto,
+        initializer_map: dict[str, onnx.TensorProto],
+        scale_base: int,
+        scale_exponent: int,
+        scale_plan: dict[int, int],
+    ) -> tuple[list[onnx.NodeProto], list[str]]:
+        """
+        Scale and cast specific initializer inputs
+        of a node according to a scaling plan.
+        Handles optional inputs gracefully (e.g. missing bias).
+        """
+        new_nodes: list[onnx.NodeProto] = []
+        new_inputs = list(node.input)
+        for input_idx, scale_mult in scale_plan.items():
+            # Skip if node doesn't have that many inputs (e.g. missing bias)
+            if input_idx >= len(node.input):
+                # Just ignore — optional input not provided
+                continue
+            input_name = node.input[input_idx]
+            if not input_name:
+                # Empty input name → optional input not present
+                continue
+            if input_name not in initializer_map:
+                # Optional inputs may be missing from initializers (e.g., dynamic bias)
+                continue
+            tensor = initializer_map[input_name]
+            if not tensor.name:
+                raise HandlerImplementationError(
+                    op_type=node.op_type,
+                    message=f"Initializer tensor for '{input_name}' on node "
+                    f"'{node.name}' is missing a name.",
+                )
+            # Scale according to plan (e.g., scale_exponent * 2 for bias)
+            quant_name, mul_node, cast_node = self.insert_scale_node(
+                tensor=tensor,
+                scale_base=scale_base,
+                scale_exponent=(scale_exponent * scale_mult),
+            )
+            # Update node input to point to scaled version
+            new_inputs[input_idx] = quant_name
+            # Record new scaling/cast nodes
+            new_nodes.extend([mul_node, cast_node])
+        return new_nodes, new_inputs
     def insert_scale_node(
         self: BaseOpQuantizer,
         tensor: onnx.TensorProto,
@@ -360,6 +417,136 @@ class BaseOpQuantizer:
         return output_name, mul_node, cast_to_int64
+class QuantizerBase:
+    OP_TYPE = None
+    DOMAIN = "ai.onnx.contrib"
+    DEFAULT_ATTRS: ClassVar = {}
+    USE_WB = False
+    USE_SCALING = False
+    def quantize(
+        self,
+        node: onnx.NodeProto,
+        graph: onnx.GraphProto,
+        scale_config: ScaleConfig,
+        initializer_map: dict[str, onnx.TensorProto],
+    ) -> list[onnx.NodeProto]:
+        """Generic quantization template for most Int64 ops."""
+        _ = graph
+        nodes = []
+        # (1) Quantize weights/bias if applicable
+        if self.USE_WB:
+            # Each subclass defines its scaling plan for which inputs get scaled and how
+            scale_plan = getattr(self, "SCALE_PLAN", {1: 1, 2: 2})  # default for W & B
+            nodes, new_inputs = self.add_scaled_initializer_inputs(
+                node=node,
+                initializer_map=initializer_map,
+                scale_base=scale_config.base,
+                scale_exponent=scale_config.exponent,
+                scale_plan=scale_plan,
+            )
+            node.input[:] = new_inputs
+        # (2) Collect & merge attributes
+        attrs = extract_attributes(node)
+        for k, v in self.DEFAULT_ATTRS.items():
+            attrs.setdefault(k, v)
+        if self.USE_SCALING:
+            attrs["rescale"] = int(scale_config.rescale)
+        # (3) Add scaling constant if needed
+        if self.USE_SCALING:
+            scale_value = self.get_scaling(scale_config.base, scale_config.exponent)
+            scale_name = f"{node.name}_int_scaler"
+            scale_tensor = numpy_helper.from_array(
+                np.array([scale_value], dtype=np.int64),
+                name=scale_name,
+            )
+            self.new_initializers.append(scale_tensor)
+            node.input.append(scale_name)
+        # (4) Create quantized node
+        quantized_node = onnx.helper.make_node(
+            self.OP_TYPE,
+            inputs=node.input,
+            outputs=node.output,
+            name=node.name,
+            domain=self.DOMAIN,
+            **attrs,
+        )
+        nodes.append(quantized_node)
+        return nodes
+    def pre_analysis_transform(
+        self: QuantizerBase,
+        node: onnx.NodeProto,
+        graph: onnx.GraphProto,
+        initializer_map: dict[str, onnx.TensorProto],
+        scale_base: int,
+        scale_exponent: int,
+    ) -> None:
+        """
+        pre_analysis_transform aims to transform the given layer along the
+        same lines as it would be transformed for the quantized model, but
+        for the weights and biases file instead, to be sent to the backend
+        Default pre-analysis behavior:
+        - If the subclass uses weights/bias (`USE_WB=True`), apply the SAME
+        scaling rules as quantization, but directly mutate the initializers.
+        - Subclasses can override this to implement more complex rewrites
+        (e.g., BatchNorm → Mul/Add).
+        Args:
+            node (onnx.NodeProto): Node to transform.
+            graph (onnx.GraphProto): Rest of the Onnx graph for initializers.
+            initializer_map (dict[str, onnx.TensorProto]): The initializer map.
+            scale_base (int): Scaling base.
+            scale_exponent (int): Scaling exponent.
+        NOTE
+         - The resulting model will not make accurate prediction and should be
+         used solely for analysis and keeping track of w_and_b
+        """
+        # If subclass does not want auto-scaling, do nothing
+        if not getattr(self, "USE_WB", False):
+            return
+        # Each quantizer defines which inputs to scale (Weight:1x, Bias:2x etc.)
+        scale_plan = getattr(self, "SCALE_PLAN", {})
+        # Perform the same scaling as quantization, but directly modify initializers
+        for input_idx, scale_mult in scale_plan.items():
+            if input_idx >= len(node.input):
+                continue
+            name = node.input[input_idx]
+            if name not in initializer_map:
+                continue  # optional input missing
+            tensor = initializer_map[name]
+            arr = numpy_helper.to_array(tensor).astype(np.float64)
+            scale = scale_base ** (scale_exponent * scale_mult)
+            new_arr = arr * scale
+            # Replace initializer directly
+            new_tensor = numpy_helper.from_array(new_arr, name=tensor.name)
+            # Modify graph initializer in place
+            for j in range(len(graph.initializer)):
+                if graph.initializer[j].name == tensor.name:
+                    del graph.initializer[j]
+                    break
+            graph.initializer.append(new_tensor)
+            initializer_map[tensor.name] = new_tensor
 class PassthroughQuantizer(BaseOpQuantizer):
     """
     Quantizer that leaves the node unchanged.

python/core/model_processing/onnx_quantizer/layers/batchnorm.py ADDED Viewed

@@ -0,0 +1,224 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, ClassVar
+from python.core.circuits.errors import CircuitConfigurationError
+if TYPE_CHECKING:
+    import onnx
+import numpy as np
+from onnx import helper, numpy_helper
+from python.core.model_processing.onnx_custom_ops.onnx_helpers import extract_attributes
+from python.core.model_processing.onnx_quantizer.exceptions import InvalidParamError
+from python.core.model_processing.onnx_quantizer.layers.base import (
+    BaseOpQuantizer,
+    QuantizerBase,
+    ScaleConfig,
+)
+class QuantizeBatchnorm(QuantizerBase):
+    OP_TYPE = "Int64BatchNorm"
+    USE_WB = True
+    USE_SCALING = False
+    SCALE_PLAN: ClassVar = {}
+class BatchnormQuantizer(BaseOpQuantizer, QuantizeBatchnorm):
+    """
+    Quantizer for ONNX Batchnorm layers.
+    - Uses standard ONNX Batchnorm layer in standard domain, and
+      makes relevant additional changes to the graph.
+    """
+    def __init__(
+        self: BatchnormQuantizer,
+        new_initializers: list[onnx.TensorProto] | None = None,
+    ) -> None:
+        super().__init__()
+        # Only replace if caller provided something
+        if new_initializers is not None:
+            self.new_initializers = new_initializers
+    def _compute_mul_add(
+        self: BatchnormQuantizer,
+        initializer_map: dict[str, onnx.TensorProto],
+        node: onnx.NodeProto,
+        scale_base: int,
+        scale_exponent: int,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Compute the 'mul' and 'add' tensors for BatchNorm folding.
+        """
+        self._validate_inputs(node=node)
+        # ONNX BatchNorm inputs: [X, scale, bias, mean, var]
+        scale_factor = scale_base**scale_exponent
+        scale = numpy_helper.to_array(initializer_map[node.input[1]]).astype(np.float32)
+        bias = numpy_helper.to_array(initializer_map[node.input[2]]).astype(np.float32)
+        mean = numpy_helper.to_array(initializer_map[node.input[3]]).astype(np.float32)
+        var = numpy_helper.to_array(initializer_map[node.input[4]]).astype(np.float32)
+        # Find epsilon attribute
+        epsilon_attr = next((a for a in node.attribute if a.name == "epsilon"), None)
+        epsilon = float(epsilon_attr.f) if epsilon_attr else 1e-5
+        mul = scale / np.sqrt(var + epsilon)
+        add = bias - mean * mul
+        scaled_add = add * (scale_factor**2)
+        scaled_mul = scale_factor * mul
+        return scaled_mul, scaled_add
+    def pre_analysis_transform(
+        self: BatchnormQuantizer,
+        node: onnx.NodeProto,
+        graph: onnx.GraphProto,
+        initializer_map: dict[str, onnx.TensorProto],
+        scale_base: int,
+        scale_exponent: int,
+    ) -> None:
+        # Compute linearized BN tensors
+        mul, add = self._compute_mul_add(
+            initializer_map,
+            node,
+            scale_base=scale_base,
+            scale_exponent=scale_exponent,
+        )
+        # Name base
+        node_name = node.name if node.name else node.input[0]
+        mul_name = f"{node_name}_mul"
+        add_name = f"{node_name}_add"
+        # Create ONNX tensors
+        mul_tensor = numpy_helper.from_array(mul.astype(np.int64), name=mul_name)
+        add_tensor = numpy_helper.from_array(add.astype(np.int64), name=add_name)
+        # Insert them into the graph
+        graph.initializer.extend([mul_tensor, add_tensor])
+        initializer_map[mul_name] = mul_tensor
+        initializer_map[add_name] = add_tensor
+        self.new_initializers.extend([mul_tensor, add_tensor])
+        node.input[:] = [node.input[0], mul_name, add_name]
+        del node.attribute[:]
+    def quantize(
+        self,
+        node: onnx.NodeProto,
+        graph: onnx.GraphProto,
+        scale_config: ScaleConfig,
+        initializer_map: dict[str, onnx.TensorProto],
+    ) -> list[onnx.NodeProto]:
+        _ = graph
+        nodes: list[onnx.NodeProto] = []
+        # 1. Compute unscaled float mul/add coefficients
+        mul, add = self._compute_mul_add(
+            initializer_map,
+            node,
+            scale_base=1,
+            scale_exponent=1,
+        )
+        node_name = node.name if node.name else node.input[0]
+        mul_name = f"{node_name}_mul"
+        add_name = f"{node_name}_add"
+        # 2. Store unscaled mul and add initializers (as floats)
+        scale_value = self.get_scaling(scale_config.base, scale_config.exponent)
+        scale_name = f"{node.name}_int_scaler"
+        scale_tensor = numpy_helper.from_array(
+            np.array([scale_value], dtype=np.int64),
+            name=scale_name,
+        )
+        self.new_initializers.append(scale_tensor)
+        mul_tensor = numpy_helper.from_array(mul.astype(np.float32), name=mul_name)
+        add_tensor = numpy_helper.from_array(add.astype(np.float32), name=add_name)
+        initializer_map[mul_name] = mul_tensor
+        initializer_map[add_name] = add_tensor
+        # 3. Insert scale and cast for mul_tensor
+        scaled_mul_name, mul_scale_node, mul_cast_node = self.insert_scale_node(
+            tensor=mul_tensor,
+            scale_base=scale_config.base,
+            scale_exponent=scale_config.exponent,
+        )
+        # 4. Insert scale and cast for add_tensor
+        scaled_add_name, add_scale_node, add_cast_node = self.insert_scale_node(
+            tensor=add_tensor,
+            scale_base=scale_config.base,
+            scale_exponent=scale_config.exponent * 2,
+        )
+        # Note, order is important here
+        nodes.extend(
+            [
+                mul_scale_node,
+                mul_cast_node,
+                add_scale_node,
+                add_cast_node,
+            ],
+        )
+        # 5. Build final Int64BatchNorm node
+        attrs = extract_attributes(node)
+        for k, v in getattr(self, "DEFAULT_ATTRS", {}).items():
+            attrs.setdefault(k, v)
+        attrs["rescale"] = 1
+        quant_node = helper.make_node(
+            self.OP_TYPE,  # Should be "Int64BatchNorm"
+            inputs=[
+                node.input[0],  # original X
+                scaled_mul_name,  # scaled mul
+                scaled_add_name,  # scaled add
+                scale_name,  # scaling factor
+            ],
+            outputs=node.output,
+            name=node.name,
+            domain=self.DOMAIN,
+            **attrs,
+        )
+        nodes.append(quant_node)
+        return nodes
+    def check_supported(
+        self: BatchnormQuantizer,
+        node: onnx.NodeProto,
+        initializer_map: dict[str, onnx.TensorProto] | None = None,
+    ) -> None:
+        """
+        For our current implementation, all batchnorm inputs
+        (scale, variance, mean, etc.)
+        must be initializers to the circuit and not inputs from earlier in the graph.
+        """
+        if initializer_map is None:
+            msg = "initializer_map is required for BatchNorm support check"
+            raise CircuitConfigurationError(node.name, node.op_type, msg)
+        self._validate_inputs(node=node)
+        # First, check to make sure that each of the batchnorm inputs are initializers
+        initializer_inputs = node.input[1:]
+        if not all(i in initializer_map for i in initializer_inputs):
+            msg = "Unsupported BatchNorm with normalization inputs not in initializers"
+            raise InvalidParamError(node.name, node.op_type, msg)
+    def _validate_inputs(self, node: onnx.NodeProto) -> None:
+        """Validate BatchNorm has required inputs in initializer_map."""
+        num_inputs = 5
+        if len(node.input) < num_inputs:
+            raise InvalidParamError(
+                node.name,
+                node.op_type,
+                f"BatchNorm requires 5 inputs, got {len(node.input)}",
+            )

python/core/model_processing/onnx_quantizer/layers/constant.py CHANGED Viewed

@@ -38,7 +38,7 @@ class ConstantQuantizer(BaseOpQuantizer):
     def __init__(
         self: ConstantQuantizer,
-        new_initializer: dict[str, onnx.TensorProto] | None = None,
+        new_initializer: list[onnx.TensorProto] | None = None,
     ) -> None:
         super().__init__()
         _ = new_initializer

python/core/model_processing/onnx_quantizer/layers/conv.py CHANGED Viewed

@@ -1,18 +1,27 @@
 from __future__ import annotations
-import numpy as np
-import onnx
-from onnx import numpy_helper
+from typing import TYPE_CHECKING, ClassVar
+if TYPE_CHECKING:
+    import onnx
-from python.core.model_processing.onnx_custom_ops.onnx_helpers import extract_attributes
 from python.core.model_processing.onnx_quantizer.exceptions import InvalidParamError
 from python.core.model_processing.onnx_quantizer.layers.base import (
     BaseOpQuantizer,
+    QuantizerBase,
     ScaleConfig,
 )
-class ConvQuantizer(BaseOpQuantizer):
+class QuantizeConv(QuantizerBase):
+    OP_TYPE = "Int64Conv"
+    USE_WB = True
+    USE_SCALING = True
+    DEFAULT_ATTRS: ClassVar = {"group": 1, "auto_pad": "NOTSET"}
+    SCALE_PLAN: ClassVar = {1: 1, 2: 2}  # weight = 1x scale, bias = 2x scale
+class ConvQuantizer(BaseOpQuantizer, QuantizeConv):
     """
     Quantizer for ONNX Conv layers.
@@ -23,9 +32,12 @@ class ConvQuantizer(BaseOpQuantizer):
     def __init__(
         self: ConvQuantizer,
-        new_initializers: dict[str, onnx.TensorProto],
+        new_initializers: list[onnx.TensorProto] | None = None,
     ) -> None:
-        self.new_initializers = new_initializers
+        super().__init__()
+        # Only replace if caller provided something
+        if new_initializers is not None:
+            self.new_initializers = new_initializers
     def quantize(
         self: ConvQuantizer,
@@ -34,67 +46,7 @@ class ConvQuantizer(BaseOpQuantizer):
         scale_config: ScaleConfig,
         initializer_map: dict[str, onnx.TensorProto],
     ) -> list[onnx.NodeProto]:
-        """
-        Quantize a Conv node by:
-        1. Quantizing its weights and bias.
-        2. Adding a scale constant.
-        3. Replacing it with an Int64Conv node.
-        Args:
-            node (onnx.NodeProto): The node to quantize.
-            rescale (bool): Whether rescaling is enabled
-                (Doesnt have an affect on this op type)
-            graph (onnx.GraphProto): The ONNX graph.
-            scale_exponent (int): Scale exponent.
-            scale_base (int): The base of scaling.
-            initializer_map (dict[str, onnx.TensorProto]):
-                Map of initializer names to tensor data.
-        Returns:
-            list[onnx.NodeProto]: A list of ONNX nodes
-                (quantized and any auxiliary nodes).
-        """
-        _ = graph
-        nodes = []
-        output_name = f"{node.name}_int"
-        nodes, node.input[:] = self.add_nodes_w_and_b(
-            node=node,
-            scale_exponent=scale_config.exponent,
-            scale_base=scale_config.base,
-            initializer_map=initializer_map,
-        )
-        attrs = extract_attributes(node)
-        attrs.setdefault("group", 1)
-        attrs.setdefault("auto_pad", "NOTSET")
-        attrs["rescale"] = int(scale_config.rescale)
-        scale_value = self.get_scaling(
-            scale_config.base,
-            scale_config.exponent,
-        )
-        # Create scale constant
-        scale_const_name = f"{output_name}_scaler"
-        scale_tensor = numpy_helper.from_array(
-            np.array([scale_value], dtype=np.int64),
-            name=scale_const_name,
-        )
-        self.new_initializers.append(scale_tensor)
-        node.input.append(scale_const_name)
-        int64_conv_node = onnx.helper.make_node(
-            "Int64Conv",
-            inputs=node.input,
-            outputs=node.output,  # preserve original output name
-            name=node.name,
-            domain="ai.onnx.contrib",
-            **attrs,
-        )
-        nodes.append(int64_conv_node)
-        return nodes
+        return QuantizeConv.quantize(self, node, graph, scale_config, initializer_map)
     def check_supported(
         self: ConvQuantizer,

python/core/model_processing/onnx_quantizer/layers/gemm.py CHANGED Viewed

@@ -1,18 +1,27 @@
 from __future__ import annotations
-import numpy as np
-import onnx
-from onnx import numpy_helper
+from typing import TYPE_CHECKING, ClassVar
+if TYPE_CHECKING:
+    import onnx
-from python.core.model_processing.onnx_custom_ops.onnx_helpers import extract_attributes
 from python.core.model_processing.onnx_quantizer.exceptions import InvalidParamError
 from python.core.model_processing.onnx_quantizer.layers.base import (
     BaseOpQuantizer,
+    QuantizerBase,
     ScaleConfig,
 )
-class GemmQuantizer(BaseOpQuantizer):
+class QuantizeGemm(QuantizerBase):
+    OP_TYPE = "Int64Gemm"
+    USE_WB = True
+    USE_SCALING = True
+    DEFAULT_ATTRS: ClassVar = {"transA": 0, "transB": 0}
+    SCALE_PLAN: ClassVar = {1: 1, 2: 2}
+class GemmQuantizer(BaseOpQuantizer, QuantizeGemm):
     """
     Quantizer for ONNX Gemm layers.
@@ -23,9 +32,12 @@ class GemmQuantizer(BaseOpQuantizer):
     def __init__(
         self: GemmQuantizer,
-        new_initializers: dict[str, onnx.TensorProto],
+        new_initializers: list[onnx.TensorProto] | None = None,
     ) -> None:
-        self.new_initializers = new_initializers
+        super().__init__()
+        # Only replace if caller provided something
+        if new_initializers is not None:
+            self.new_initializers = new_initializers
     def quantize(
         self: GemmQuantizer,
@@ -34,65 +46,7 @@ class GemmQuantizer(BaseOpQuantizer):
         scale_config: ScaleConfig,
         initializer_map: dict[str, onnx.TensorProto],
     ) -> list[onnx.NodeProto]:
-        """
-        Quantize a Gemm node by:
-        1. Quantizing its weights and bias.
-        2. Adding a scale constant.
-        3. Replacing it with an Int64Gemm node.
-        Args:
-            node (onnx.NodeProto): The node to quantize.
-            rescale (bool): Whether rescaling is enabled
-            graph (onnx.GraphProto): The ONNX graph.
-            scale_exponent (int): Scale exponent.
-            scale_base (int): The base of scaling.
-            initializer_map (dict[str, onnx.TensorProto]):
-                Map of initializer names to tensor data.
-        Returns:
-            List[onnx.NodeProto]: A list of ONNX nodes
-                (quantized and any auxiliary nodes).
-        """
-        _ = graph
-        nodes = []
-        output_name = f"{node.name}_int"
-        nodes, new_inputs = self.add_nodes_w_and_b(
-            node=node,
-            scale_exponent=scale_config.exponent,
-            scale_base=scale_config.base,
-            initializer_map=initializer_map,
-        )
-        node.input[:] = new_inputs
-        attrs = extract_attributes(node)
-        attrs.setdefault("transA", 0)
-        attrs.setdefault("transB", 0)
-        attrs["rescale"] = int(scale_config.rescale)
-        scale_value = self.get_scaling(
-            scale_config.base,
-            scale_config.exponent,
-        )
-        # === Create scale constant ===
-        scale_const_name = f"{output_name}_scaler"
-        scale_tensor = numpy_helper.from_array(
-            np.array([scale_value], dtype=np.int64),
-            name=scale_const_name,
-        )
-        self.new_initializers.append(scale_tensor)
-        node.input.append(scale_const_name)
-        int64_gemm = onnx.helper.make_node(
-            "Int64Gemm",
-            inputs=node.input,
-            outputs=node.output,  # preserve original output name
-            name=output_name,
-            domain="ai.onnx.contrib",
-            **attrs,
-        )
-        nodes.append(int64_gemm)
-        return nodes
+        return QuantizeGemm.quantize(self, node, graph, scale_config, initializer_map)
     def check_supported(
         self: GemmQuantizer,