PyPI - onnx2fx - Versions diffs - 0.0.0__py3-none-any.whl - Mend

onnx2fx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

onnx2fx/__init__.py +96 -0
onnx2fx/converter.py +62 -0
onnx2fx/exceptions.py +155 -0
onnx2fx/graph_builder.py +634 -0
onnx2fx/op_registry.py +345 -0
onnx2fx/ops/__init__.py +74 -0
onnx2fx/ops/activation.py +282 -0
onnx2fx/ops/arithmetic.py +281 -0
onnx2fx/ops/attention.py +1055 -0
onnx2fx/ops/attention_msft.py +682 -0
onnx2fx/ops/control_flow.py +947 -0
onnx2fx/ops/convolution.py +406 -0
onnx2fx/ops/image.py +748 -0
onnx2fx/ops/linalg.py +33 -0
onnx2fx/ops/loss.py +56 -0
onnx2fx/ops/nn.py +96 -0
onnx2fx/ops/normalization.py +289 -0
onnx2fx/ops/pooling.py +897 -0
onnx2fx/ops/quantization.py +524 -0
onnx2fx/ops/random.py +102 -0
onnx2fx/ops/recurrent.py +647 -0
onnx2fx/ops/reduction.py +534 -0
onnx2fx/ops/sequence.py +304 -0
onnx2fx/ops/signal.py +444 -0
onnx2fx/ops/string.py +126 -0
onnx2fx/ops/tensor.py +1161 -0
onnx2fx/ops/training.py +402 -0
onnx2fx/py.typed +0 -0
onnx2fx/utils/__init__.py +45 -0
onnx2fx/utils/analyze.py +139 -0
onnx2fx/utils/attributes.py +150 -0
onnx2fx/utils/dtype.py +107 -0
onnx2fx/utils/external_data.py +233 -0
onnx2fx/utils/names.py +43 -0
onnx2fx/utils/op_helpers.py +339 -0
onnx2fx/utils/training.py +54 -0
onnx2fx-0.0.0.dist-info/METADATA +395 -0
onnx2fx-0.0.0.dist-info/RECORD +39 -0
onnx2fx-0.0.0.dist-info/WHEEL +4 -0

onnx2fx/ops/linalg.py ADDED Viewed

@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Linear algebra operators."""
+from typing import TYPE_CHECKING
+import onnx
+import torch
+from ..op_registry import register
+from ..utils.attributes import get_attribute
+if TYPE_CHECKING:
+    from ..graph_builder import GraphBuilder
+# =============================================================================
+# Linear algebra operators
+# =============================================================================
+@register("Einsum")
+def einsum(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Einstein summation."""
+    equation = get_attribute(node, "equation")
+    inputs = [builder.get_value(name) for name in node.input]
+    return builder.call_function(torch.einsum, args=(equation, *inputs))
+@register("Det")
+def det(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Matrix determinant."""
+    x = builder.get_value(node.input[0])
+    return builder.call_function(torch.linalg.det, args=(x,))

onnx2fx/ops/loss.py ADDED Viewed

@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Loss function operators."""
+from typing import TYPE_CHECKING
+import onnx
+import torch
+from ..op_registry import register
+from ..utils.attributes import get_attribute
+from ..utils.op_helpers import get_optional_input
+if TYPE_CHECKING:
+    from ..graph_builder import GraphBuilder
+@register("SoftmaxCrossEntropyLoss")
+def softmax_cross_entropy_loss(
+    builder: "GraphBuilder", node: onnx.NodeProto
+) -> torch.fx.Node:
+    """Softmax cross entropy loss."""
+    scores = builder.get_value(node.input[0])
+    labels = builder.get_value(node.input[1])
+    weights = get_optional_input(builder, node, 2)
+    ignore_index = get_attribute(node, "ignore_index", -100)
+    reduction = get_attribute(node, "reduction", "mean")
+    kwargs = {"ignore_index": ignore_index, "reduction": reduction}
+    if weights is not None:
+        kwargs["weight"] = weights
+    return builder.call_function(
+        torch.nn.functional.cross_entropy, args=(scores, labels), kwargs=kwargs
+    )
+@register("NegativeLogLikelihoodLoss")
+def negative_log_likelihood_loss(
+    builder: "GraphBuilder", node: onnx.NodeProto
+) -> torch.fx.Node:
+    """Negative log likelihood loss."""
+    input_node = builder.get_value(node.input[0])
+    target = builder.get_value(node.input[1])
+    weight = get_optional_input(builder, node, 2)
+    ignore_index = get_attribute(node, "ignore_index", -100)
+    reduction = get_attribute(node, "reduction", "mean")
+    kwargs = {"ignore_index": ignore_index, "reduction": reduction}
+    if weight is not None:
+        kwargs["weight"] = weight
+    return builder.call_function(
+        torch.nn.functional.nll_loss, args=(input_node, target), kwargs=kwargs
+    )

onnx2fx/ops/nn.py ADDED Viewed

@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Neural network layer operators.
+This module contains core neural network operators like MatMul, Gemm, and Dropout.
+Other neural network operators are organized in specialized modules:
+- convolution.py: Conv, ConvTranspose, DeformConv
+- pooling.py: MaxPool, AveragePool, GlobalAveragePool, etc.
+- normalization.py: BatchNormalization, LayerNormalization, etc.
+- recurrent.py: LSTM, GRU, RNN
+"""
+from typing import TYPE_CHECKING
+import onnx
+import torch
+from ..op_registry import register
+from ..utils.attributes import get_attribute
+from ..utils.op_helpers import get_optional_input
+if TYPE_CHECKING:
+    from ..graph_builder import GraphBuilder
+# =============================================================================
+# Matrix multiplication operators
+# =============================================================================
+@register("MatMul")
+def matmul(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Matrix multiplication."""
+    a = builder.get_value(node.input[0])
+    b = builder.get_value(node.input[1])
+    return builder.call_function(torch.matmul, args=(a, b))
+@register("Gemm")
+def gemm(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """General Matrix Multiplication: Y = alpha * A' * B' + beta * C."""
+    a = builder.get_value(node.input[0])
+    b = builder.get_value(node.input[1])
+    alpha = get_attribute(node, "alpha", 1.0)
+    beta = get_attribute(node, "beta", 1.0)
+    trans_a = get_attribute(node, "transA", 0)
+    trans_b = get_attribute(node, "transB", 0)
+    def _gemm(a, b, c, alpha, beta, trans_a, trans_b):
+        if trans_a:
+            a = a.T
+        if trans_b:
+            b = b.T
+        result = alpha * torch.matmul(a, b)
+        if c is not None:
+            result = result + beta * c
+        return result
+    c = get_optional_input(builder, node, 2)
+    return builder.call_function(_gemm, args=(a, b, c, alpha, beta, trans_a, trans_b))
+# =============================================================================
+# Dropout and regularization
+# =============================================================================
+@register("Dropout")
+def dropout(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Dropout (inference mode - identity).
+    ONNX Dropout can have 2 outputs:
+    - output: The result after dropout (same as input in inference mode)
+    - mask (optional): Boolean mask indicating which elements were kept (all True in inference mode)
+    """
+    x = builder.get_value(node.input[0])
+    # Check if mask output is requested (second output)
+    return_mask = len(node.output) > 1 and node.output[1] != ""
+    # In inference mode, dropout is identity
+    # ratio = get_attribute(node, "ratio", 0.5)
+    # training_mode from input or default to False
+    def _dropout_with_mask(x):
+        # In inference mode, output is identity and mask is all True
+        output = x
+        mask = torch.ones_like(x, dtype=torch.bool)
+        return output, mask
+    if return_mask:
+        return builder.call_function(_dropout_with_mask, args=(x,))
+    else:
+        # For inference without mask, just return input
+        return builder.call_function(lambda t: t, args=(x,))

onnx2fx/ops/normalization.py ADDED Viewed

@@ -0,0 +1,289 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Normalization operators."""
+from typing import TYPE_CHECKING
+import onnx
+import torch
+import torch.nn.functional as F
+from ..op_registry import register
+from ..utils.attributes import get_attribute
+from ..utils.dtype import stash_type_to_torch_dtype
+from ..utils.op_helpers import get_optional_input
+if TYPE_CHECKING:
+    from ..graph_builder import GraphBuilder
+# =============================================================================
+# Normalization operators
+# =============================================================================
+@register("LpNormalization")
+def lp_normalization(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Lp Normalization.
+    Normalizes input element-wise by dividing by the Lp norm along the specified axis.
+    Attributes:
+        axis: The axis on which to apply normalization (default: -1)
+        p: The order of the normalization, only 1 or 2 are supported (default: 2)
+    """
+    x = builder.get_value(node.input[0])
+    axis = get_attribute(node, "axis", -1)
+    p = get_attribute(node, "p", 2)
+    def _lp_normalize(x, axis, p):
+        if p == 1:
+            # L1 normalization: x / sum(|x|)
+            norm = torch.sum(torch.abs(x), dim=axis, keepdim=True)
+            # Avoid division by zero
+            norm = torch.clamp(norm, min=1e-12)
+            return x / norm
+        elif p == 2:
+            # L2 normalization: x / sqrt(sum(x^2))
+            # Note: We don't use F.normalize because it returns 0 for zero vectors,
+            # but ONNX expects NaN (0/0 behavior)
+            norm = torch.sqrt(torch.sum(x * x, dim=axis, keepdim=True))
+            return x / norm
+        else:
+            raise ValueError(f"LpNormalization only supports p=1 or p=2, got p={p}")
+    return builder.call_function(_lp_normalize, args=(x, axis, p))
+@register("BatchNormalization")
+def batch_normalization(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Batch normalization."""
+    x = builder.get_value(node.input[0])
+    scale = builder.get_value(node.input[1])
+    bias = builder.get_value(node.input[2])
+    mean = builder.get_value(node.input[3])
+    var = builder.get_value(node.input[4])
+    epsilon = get_attribute(node, "epsilon", 1e-5)
+    # Note: ONNX momentum attribute is not used in inference mode
+    training_mode = get_attribute(node, "training_mode", 0)
+    def _batch_norm(x, scale, bias, mean, var, epsilon, training_mode):
+        return F.batch_norm(
+            x,
+            mean,
+            var,
+            weight=scale,
+            bias=bias,
+            training=bool(training_mode),
+            eps=epsilon,
+        )
+    return builder.call_function(
+        _batch_norm, args=(x, scale, bias, mean, var, epsilon, training_mode)
+    )
+@register("LayerNormalization")
+def layer_normalization(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Layer normalization.
+    ONNX LayerNormalization returns up to 3 outputs:
+    - Y: normalized output (required)
+    - Mean: mean values (optional)
+    - InvStdDev: inverse standard deviation (optional)
+    """
+    x = builder.get_value(node.input[0])
+    scale = builder.get_value(node.input[1])
+    bias = get_optional_input(builder, node, 2)
+    axis = get_attribute(node, "axis", -1)
+    epsilon = get_attribute(node, "epsilon", 1e-5)
+    stash_type = get_attribute(node, "stash_type", 1)  # 1 = float32
+    # Check how many outputs are requested
+    num_outputs = len([o for o in node.output if o])
+    def _layer_norm_single(x, scale, bias, axis, epsilon):
+        # Compute normalized shape from axis
+        if axis < 0:
+            axis = x.dim() + axis
+        normalized_shape = x.shape[axis:]
+        return F.layer_norm(x, normalized_shape, weight=scale, bias=bias, eps=epsilon)
+    def _layer_norm_with_stats(x, scale, bias, axis, epsilon, stash_type):
+        # Compute normalized shape from axis
+        if axis < 0:
+            axis = x.dim() + axis
+        # Determine stash dtype for mean/invstddev computation
+        stash_dtype = stash_type_to_torch_dtype(stash_type)
+        # Cast input to stash dtype for computing statistics
+        original_dtype = x.dtype
+        x_stash = x.to(stash_dtype)
+        # Compute mean and variance over the normalized dimensions
+        dims = list(range(axis, x.dim()))
+        mean = x_stash.mean(dim=dims, keepdim=True)
+        var = x_stash.var(dim=dims, unbiased=False, keepdim=True)
+        inv_std_dev = 1.0 / torch.sqrt(var + epsilon)
+        # Normalize
+        x_norm = (x_stash - mean) * inv_std_dev
+        # Apply scale and bias
+        if scale is not None:
+            x_norm = x_norm * scale.to(stash_dtype)
+        if bias is not None:
+            x_norm = x_norm + bias.to(stash_dtype)
+        # Cast back to original dtype
+        y = x_norm.to(original_dtype)
+        return (y, mean, inv_std_dev)
+    if num_outputs == 1:
+        return builder.call_function(
+            _layer_norm_single, args=(x, scale, bias, axis, epsilon)
+        )
+    else:
+        return builder.call_function(
+            _layer_norm_with_stats, args=(x, scale, bias, axis, epsilon, stash_type)
+        )
+@register("RMSNormalization", since_version=23)
+def rms_normalization(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """RMS Normalization (Root Mean Square Layer Normalization).
+    This is LayerNormalization without mean subtraction, also known as RMSNorm.
+    Formula: Y = X / sqrt(mean(X^2) + epsilon) * scale
+    Inputs:
+        X: Input tensor
+        scale: Scale tensor (broadcastable to normalized shape)
+    Attributes:
+        axis: First normalization dimension (default: -1)
+        epsilon: Small constant for numerical stability (default: 1e-5)
+        stash_type: Floating-point precision for computation (default: 1 = float32)
+    """
+    x = builder.get_value(node.input[0])
+    scale = builder.get_value(node.input[1])
+    axis = get_attribute(node, "axis", -1)
+    epsilon = get_attribute(node, "epsilon", 1e-5)
+    stash_type = get_attribute(node, "stash_type", 1)
+    def _rms_norm(x, scale, axis, epsilon, stash_type):
+        # Determine stash dtype for computation
+        stash_dtype = stash_type_to_torch_dtype(stash_type)
+        # Normalize axis
+        if axis < 0:
+            axis_pos = x.dim() + axis
+        else:
+            axis_pos = axis
+        # Save original dtype for casting back
+        original_dtype = x.dtype
+        # Cast to stash dtype for computation
+        x_stash = x.to(stash_dtype)
+        # Compute dimensions to reduce over (from axis to end)
+        dims = list(range(axis_pos, x.dim()))
+        # Compute RMS: sqrt(mean(x^2) + epsilon)
+        x_squared = x_stash.pow(2)
+        mean_squared = x_squared.mean(dim=dims, keepdim=True)
+        rms = torch.sqrt(mean_squared + epsilon)
+        # Normalize
+        x_normalized = x_stash / rms
+        # Apply scale
+        scale_stash = scale.to(stash_dtype)
+        y = x_normalized * scale_stash
+        # Cast back to original dtype
+        return y.to(original_dtype)
+    return builder.call_function(_rms_norm, args=(x, scale, axis, epsilon, stash_type))
+@register("InstanceNormalization")
+def instance_normalization(
+    builder: "GraphBuilder", node: onnx.NodeProto
+) -> torch.fx.Node:
+    """Instance normalization."""
+    x = builder.get_value(node.input[0])
+    scale = builder.get_value(node.input[1])
+    bias = builder.get_value(node.input[2])
+    epsilon = get_attribute(node, "epsilon", 1e-5)
+    def _instance_norm(x, scale, bias, epsilon):
+        return F.instance_norm(x, weight=scale, bias=bias, eps=epsilon)
+    return builder.call_function(_instance_norm, args=(x, scale, bias, epsilon))
+@register("GroupNormalization")
+def group_normalization(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Group normalization."""
+    x = builder.get_value(node.input[0])
+    scale = builder.get_value(node.input[1])
+    bias = builder.get_value(node.input[2])
+    epsilon = get_attribute(node, "epsilon", 1e-5)
+    num_groups = get_attribute(node, "num_groups")
+    def _group_norm(x, scale, bias, num_groups, epsilon):
+        return F.group_norm(x, num_groups, weight=scale, bias=bias, eps=epsilon)
+    return builder.call_function(
+        _group_norm, args=(x, scale, bias, num_groups, epsilon)
+    )
+@register("LRN")
+def lrn(builder: "GraphBuilder", node: onnx.NodeProto) -> torch.fx.Node:
+    """Local Response Normalization."""
+    x = builder.get_value(node.input[0])
+    alpha = get_attribute(node, "alpha", 0.0001)
+    beta = get_attribute(node, "beta", 0.75)
+    bias = get_attribute(node, "bias", 1.0)
+    size = get_attribute(node, "size")
+    return builder.call_function(
+        F.local_response_norm,
+        args=(x, size),
+        kwargs={"alpha": alpha, "beta": beta, "k": bias},
+    )
+@register("MeanVarianceNormalization")
+def mean_variance_normalization(
+    builder: "GraphBuilder", node: onnx.NodeProto
+) -> torch.fx.Node:
+    """Mean Variance Normalization.
+    Performs normalization using formula: (X - E[X]) / sqrt(E[(X - E[X])^2])
+    Default axes are [0, 2, 3] for NCHW format (normalize across N, H, W).
+    """
+    x = builder.get_value(node.input[0])
+    axes = get_attribute(node, "axes", [0, 2, 3])
+    def _mvn(x, axes):
+        axes = tuple(axes)
+        eps = 1e-9
+        mean = x.mean(dim=axes, keepdim=True)
+        variance = ((x - mean) ** 2).mean(dim=axes, keepdim=True)
+        std = torch.sqrt(variance + eps)
+        return (x - mean) / std
+    return builder.call_function(_mvn, args=(x, axes))