PyPI - JSTprove - Versions diffs - 1.0.0__py3-none-macosx_11_0_arm64.whl → 1.2.0__py3-none-macosx_11_0_arm64.whl - Mend

JSTprove 1.0.0__py3-none-macosx_11_0_arm64.whl → 1.2.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of JSTprove might be problematic. Click here for more details.

Files changed (61) hide show

python/core/model_processing/converters/onnx_converter.py CHANGED Viewed

@@ -5,6 +5,10 @@ import logging
 from dataclasses import asdict, dataclass
 from importlib.metadata import version as get_version
 from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from onnxruntime import NodeArg
 import numpy as np
 import onnx
@@ -18,6 +22,7 @@ from onnxruntime_extensions import get_library_path
 import python.core.model_processing.onnx_custom_ops  # noqa: F401
 from python.core import PACKAGE_NAME
+from python.core.circuits.errors import CircuitConfigurationError
 from python.core.model_processing.converters.base import ModelConverter, ModelType
 from python.core.model_processing.errors import (
     InferenceError,
@@ -242,6 +247,7 @@ class ONNXConverter(ModelConverter):
     def analyze_layers(
         self: ONNXConverter,
+        model: onnx.ModelProto,
         output_name_to_shape: dict[str, list[int]] | None = None,
     ) -> tuple[list[ONNXLayer], list[ONNXLayer]]:
         """Analyze the onnx model graph into
@@ -263,29 +269,29 @@ class ONNXConverter(ModelConverter):
             id_count = 0
             # Apply shape inference on the model
             if not output_name_to_shape:
-                inferred_model = shape_inference.infer_shapes(self.model)
+                inferred_model = shape_inference.infer_shapes(model)
                 self._onnx_check_model_safely(inferred_model)
                 output_name_to_shape = extract_shape_dict(inferred_model)
             domain_to_version = {
-                opset.domain: opset.version for opset in self.model.opset_import
+                opset.domain: opset.version for opset in model.opset_import
             }
             id_count = 0
             architecture = self.get_model_architecture(
-                self.model,
+                model,
                 output_name_to_shape,
                 domain_to_version,
             )
             w_and_b = self.get_model_w_and_b(
-                self.model,
+                model,
                 output_name_to_shape,
                 id_count,
                 domain_to_version,
             )
         except InvalidModelError:
             raise
-        except (ValueError, TypeError, RuntimeError, OSError, onnx.ONNXException) as e:
+        except (ValueError, TypeError, RuntimeError, OSError) as e:
             raise LayerAnalysisError(model_type=self.model_type, reason=str(e)) from e
         except Exception as e:
             raise LayerAnalysisError(model_type=self.model_type, reason=str(e)) from e
@@ -508,9 +514,9 @@ class ONNXConverter(ModelConverter):
                 opts,
                 providers=["CPUExecutionProvider"],
             )
-        except (OSError, onnx.ONNXException, RuntimeError, Exception) as e:
+        except (OSError, RuntimeError, Exception) as e:
             raise InferenceError(
-                model_path,
+                model_path=model_path,
                 model_type=self.model_type,
                 reason=str(e),
             ) from e
@@ -552,6 +558,7 @@ class ONNXConverter(ModelConverter):
         output_shapes = {
             out_name: output_name_to_shape.get(out_name, []) for out_name in outputs
         }
         return ONNXLayer(
             id=layer_id,
             name=name,
@@ -600,6 +607,7 @@ class ONNXConverter(ModelConverter):
             np_data = onnx.numpy_helper.to_array(node, constant_dtype)
         except (ValueError, TypeError, onnx.ONNXException, Exception) as e:
             raise SerializationError(
+                model_type=self.model_type,
                 tensor_name=node.name,
                 reason=f"Failed to convert tensor: {e!s}",
             ) from e
@@ -877,8 +885,10 @@ class ONNXConverter(ModelConverter):
             OSError,
             Exception,
         ) as e:
-            msg = "Quantization failed for model"
-            f" '{getattr(self, 'model_file_name', 'unknown')}': {e!s}"
+            msg = (
+                "Quantization failed for model"
+                f" '{getattr(self, 'model_file_name', 'unknown')}': {e!s}"
+            )
             raise ModelConversionError(
                 msg,
                 model_type=self.model_type,
@@ -1033,38 +1043,36 @@ class ONNXConverter(ModelConverter):
                   ``rescale_config``.
         """
         inferred_model = shape_inference.infer_shapes(self.model)
-        scaling = BaseOpQuantizer.get_scaling(
-            scale_base=getattr(self, "scale_base", 2),
-            scale_exponent=(getattr(self, "scale_exponent", 18)),
-        )
+        scale_base = getattr(self, "scale_base", 2)
+        scale_exponent = getattr(self, "scale_exponent", 18)
         # Check the model and print Y"s shape information
         self._onnx_check_model_safely(inferred_model)
         output_name_to_shape = extract_shape_dict(inferred_model)
-        (architecture, w_and_b) = self.analyze_layers(output_name_to_shape)
-        for w in w_and_b:
+        scaled_and_transformed_model = self.op_quantizer.apply_pre_analysis_transforms(
+            inferred_model,
+            scale_exponent=scale_exponent,
+            scale_base=scale_base,
+        )
+        # Get layers in correct format
+        (architecture, w_and_b) = self.analyze_layers(
+            scaled_and_transformed_model,
+            output_name_to_shape,
+        )
+        def _convert_tensor_to_int_list(w: ONNXLayer) -> list:
             try:
-                w_and_b_array = np.asarray(w.tensor)
-            except (ValueError, TypeError, Exception) as e:
+                arr = np.asarray(w.tensor).astype(np.int64)
+                return arr.tolist()
+            except Exception as e:
                 raise SerializationError(
                     tensor_name=getattr(w, "name", None),
+                    model_type=self.model_type,
                     reason=f"cannot convert to ndarray: {e}",
                 ) from e
-            try:
-                # TODO @jsgold-1: We need a better way to distinguish bias tensors from weight tensors # noqa: FIX002, TD003,E501
-                if "bias" in w.name:
-                    w_and_b_scaled = w_and_b_array * scaling * scaling
-                else:
-                    w_and_b_scaled = w_and_b_array * scaling
-                w_and_b_out = w_and_b_scaled.astype(np.int64).tolist()
-                w.tensor = w_and_b_out
-            except (ValueError, TypeError, OverflowError, Exception) as e:
-                raise SerializationError(
-                    tensor_name=getattr(w, "name", None),
-                    reason=str(e),
-                ) from e
+        for w in w_and_b:
+            w.tensor = _convert_tensor_to_int_list(w)
         inputs = []
         outputs = []
@@ -1118,45 +1126,92 @@ class ONNXConverter(ModelConverter):
             rescale_config=getattr(self, "rescale_config", {}),
         )
+    def _process_single_input_for_get_outputs(
+        self: ONNXConverter,
+        value: np.ndarray | torch.Tensor,
+        input_def: NodeArg,
+    ) -> np.ndarray:
+        """Process a single input tensor according to dtype and scale settings."""
+        value = torch.as_tensor(value)
+        if value.dtype in (
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.int64,
+            torch.uint8,
+        ):
+            value = value.double()
+            value = value / BaseOpQuantizer.get_scaling(
+                scale_base=self.scale_base,
+                scale_exponent=self.scale_exponent,
+            )
+        if input_def.type == "tensor(double)":
+            return np.asarray(value).astype(np.float64)
+        return np.asarray(value)
     def get_outputs(
         self: ONNXConverter,
-        inputs: np.ndarray | torch.Tensor,
+        inputs: np.ndarray | torch.Tensor | dict[str, np.ndarray | torch.Tensor],
     ) -> list[np.ndarray]:
         """Run the currently loaded (quantized) model via ONNX Runtime.
         Args:
-            inputs (Any): Input array/tensor matching the models first input.
+            inputs: Single tensor/array or a dict of named inputs.
         Returns:
-            Any: The output of the onnxruntime inference.
+            list[np.ndarray]: List of output arrays from ONNX Runtime inference.
         """
+        def _raise_type_error(inputs: np.ndarray | torch.Tensor) -> None:
+            msg = (
+                "Expected np.ndarray, torch.Tensor, or dict "
+                f"for inputs, got {type(inputs)}"
+            )
+            raise TypeError(msg)
+        def _raise_value_error(msg: str) -> None:
+            raise ValueError(msg)
+        def _raise_no_scale_configs() -> None:
+            raise CircuitConfigurationError(
+                missing_attributes=["scale_base", "scale_exponent"],
+            )
+        scale_base = getattr(self, "scale_base", None)
+        scale_exponent = getattr(self, "scale_exponent", None)
         try:
-            input_name = self.ort_sess.get_inputs()[0].name
-            output_name = self.ort_sess.get_outputs()[0].name
-            # TODO @jsgold-1: This may cause some rounding errors at some point but works for now. # noqa: FIX002, E501, TD003
-            inputs = torch.as_tensor(inputs)
-            if inputs.dtype in (
-                torch.int8,
-                torch.int16,
-                torch.int32,
-                torch.int64,
-                torch.uint8,
-            ):
-                inputs = inputs.double()
-                inputs = inputs / BaseOpQuantizer.get_scaling(
-                    scale_base=self.scale_base,
-                    scale_exponent=self.scale_exponent,
+            input_defs = self.ort_sess.get_inputs()
+            output_defs = self.ort_sess.get_outputs()
+            output_names = [out.name for out in output_defs]
+            if scale_base is None or scale_exponent is None:
+                _raise_no_scale_configs()
+            # Normalize inputs into a dict
+            if isinstance(inputs, (np.ndarray, torch.Tensor)):
+                input_name = input_defs[0].name
+                inputs = {input_name: inputs}
+            elif not isinstance(inputs, dict):
+                _raise_type_error(inputs)
+            # Process inputs
+            processed_inputs = {}
+            for input_def in input_defs:
+                name = input_def.name
+                if name not in inputs:
+                    _raise_value_error(
+                        f"Missing required input '{name}' in provided inputs",
+                    )
+                processed_inputs[name] = self._process_single_input_for_get_outputs(
+                    inputs[name],
+                    input_def,
                 )
-            if self.ort_sess.get_inputs()[0].type == "tensor(double)":
-                return self.ort_sess.run(
-                    [output_name],
-                    {input_name: np.asarray(inputs).astype(np.float64)},
-                )
-            return self.ort_sess.run(
-                [output_name],
-                {input_name: np.asarray(inputs)},
-            )
+            return self.ort_sess.run(output_names, processed_inputs)
         except (RuntimeError, ValueError, TypeError, Exception) as e:
             raise InferenceError(
                 model_path=getattr(self, "quantized_model_path", None),

python/core/model_processing/onnx_custom_ops/batchnorm.py ADDED Viewed

@@ -0,0 +1,64 @@
+from __future__ import annotations
+import numpy as np
+from onnxruntime_extensions import PyCustomOpDef, onnx_op
+from .custom_helpers import rescaling
+@onnx_op(
+    op_type="Int64BatchNorm",
+    domain="ai.onnx.contrib",
+    inputs=[
+        PyCustomOpDef.dt_int64,  # X (int64)
+        PyCustomOpDef.dt_int64,  # mul (int64 scaled multiplier)
+        PyCustomOpDef.dt_int64,  # add (int64 scaled adder)
+        PyCustomOpDef.dt_int64,  # scaling_factor
+    ],
+    outputs=[PyCustomOpDef.dt_int64],
+    attrs={"rescale": PyCustomOpDef.dt_int64},
+)
+def int64_batchnorm(
+    x: np.ndarray,
+    mul: np.ndarray,
+    add: np.ndarray,
+    scaling_factor: np.ndarray | None = None,
+    rescale: int | None = None,
+) -> np.ndarray:
+    """
+    Int64 BatchNorm (folded into affine transform).
+    Computes:
+        Y = X * mul + add
+    where mul/add are already scaled to int64.
+    Parameters
+    ----------
+    x : Input int64 tensor
+    mul : Per-channel int64 scale multipliers
+    add : Per-channel int64 bias terms
+    scaling_factor: factor to rescale
+    rescale : Optional flag to apply post-scaling
+    Returns
+    -------
+    numpy.ndarray (int64)
+    """
+    try:
+        # Broadcasting shapes must match batchnorm layout: NCHW
+        # Typically mul/add have shape [C]
+        dims_x = len(x.shape)
+        dim_ones = (1,) * (dims_x - 2)
+        mul = mul.reshape(-1, *dim_ones)
+        add = add.reshape(-1, *dim_ones)
+        y = x * mul + add
+        if rescale is not None:
+            y = rescaling(scaling_factor, rescale, y)
+        return y.astype(np.int64)
+    except Exception as e:
+        msg = f"Int64BatchNorm failed: {e}"
+        raise RuntimeError(msg) from e

python/core/model_processing/onnx_custom_ops/maxpool.py CHANGED Viewed

@@ -75,5 +75,5 @@ def int64_maxpool(
         )
         return result.numpy().astype(np.int64)
     except Exception as e:
-        msg = f"Int64Gemm failed: {e}"
+        msg = f"Int64MaxPool failed: {e}"
         raise RuntimeError(msg) from e

python/core/model_processing/onnx_custom_ops/mul.py ADDED Viewed

@@ -0,0 +1,66 @@
+import numpy as np
+from onnxruntime_extensions import PyCustomOpDef, onnx_op
+from .custom_helpers import rescaling
+@onnx_op(
+    op_type="Int64Mul",
+    domain="ai.onnx.contrib",
+    inputs=[
+        PyCustomOpDef.dt_int64,
+        PyCustomOpDef.dt_int64,
+        PyCustomOpDef.dt_int64,  # Scalar
+    ],
+    outputs=[PyCustomOpDef.dt_int64],
+    attrs={
+        "rescale": PyCustomOpDef.dt_int64,
+    },
+)
+def int64_mul(
+    a: np.ndarray,
+    b: np.ndarray,
+    scaling_factor: np.ndarray | None = None,
+    rescale: int | None = None,
+) -> np.ndarray:
+    """
+    Performs a Mul (hadamard product) operation on int64 input tensors.
+    This function is registered as a custom ONNX operator via onnxruntime_extensions
+    and is used in the JSTprove quantized inference pipeline.
+    It applies Mul with the rescaling the outputs back to the original scale.
+    Parameters
+    ----------
+    a : np.ndarray
+        First input tensor with dtype int64.
+    b : np.ndarray
+        Second input tensor with dtype int64.
+    scaling_factor : Scaling factor for rescaling the output.
+        Optional scalar tensor for rescaling when rescale=1.
+    rescale : int, optional
+        Whether to apply rescaling (0=no, 1=yes).
+    Returns
+    -------
+    numpy.ndarray
+        Mul tensor with dtype int64.
+    Notes
+    -----
+    - This op is part of the `ai.onnx.contrib` custom domain.
+    - ONNX Runtime Extensions is required to register this op.
+    References
+    ----------
+    For more information on the Mul operation, please refer to the
+    ONNX standard Mul operator documentation:
+    https://onnx.ai/onnx/operators/onnx__Mul.html
+    """
+    try:
+        result = a * b
+        result = rescaling(scaling_factor, rescale, result)
+        return result.astype(np.int64)
+    except Exception as e:
+        msg = f"Int64Mul failed: {e}"
+        raise RuntimeError(msg) from e

python/core/model_processing/onnx_custom_ops/relu.py CHANGED Viewed

@@ -39,5 +39,5 @@ def int64_relu(x: np.ndarray) -> np.ndarray:
     try:
         return np.maximum(x, 0).astype(np.int64)
     except Exception as e:
-        msg = f"Int64Gemm failed: {e}"
+        msg = f"Int64ReLU failed: {e}"
         raise RuntimeError(msg) from e

python/core/model_processing/onnx_quantizer/layers/add.py ADDED Viewed

@@ -0,0 +1,54 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, ClassVar
+if TYPE_CHECKING:
+    import onnx
+from python.core.model_processing.onnx_quantizer.layers.base import (
+    BaseOpQuantizer,
+    QuantizerBase,
+    ScaleConfig,
+)
+class QuantizeAdd(QuantizerBase):
+    OP_TYPE = "Add"
+    DOMAIN = ""
+    USE_WB = True
+    USE_SCALING = False
+    SCALE_PLAN: ClassVar = {0: 1, 1: 1}
+class AddQuantizer(BaseOpQuantizer, QuantizeAdd):
+    """
+    Quantizer for ONNX Add layers.
+    - Uses standard ONNX Add layer in standard domain, and
+      makes relevant additional changes to the graph.
+    """
+    def __init__(
+        self: AddQuantizer,
+        new_initializers: list[onnx.TensorProto] | None = None,
+    ) -> None:
+        super().__init__()
+        # Only replace if caller provided something
+        if new_initializers is not None:
+            self.new_initializers = new_initializers
+    def quantize(
+        self: AddQuantizer,
+        node: onnx.NodeProto,
+        graph: onnx.GraphProto,
+        scale_config: ScaleConfig,
+        initializer_map: dict[str, onnx.TensorProto],
+    ) -> list[onnx.NodeProto]:
+        return QuantizeAdd.quantize(self, node, graph, scale_config, initializer_map)
+    def check_supported(
+        self: AddQuantizer,
+        node: onnx.NodeProto,
+        initializer_map: dict[str, onnx.TensorProto] | None = None,
+    ) -> None:
+        pass