PyPI - onnx-ir - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

onnx-ir 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of onnx-ir might be problematic. Click here for more details.

Files changed (12) hide show

onnx_ir/__init__.py +3 -2
onnx_ir/_convenience/__init__.py +4 -4
onnx_ir/_convenience/_constructors.py +75 -4
onnx_ir/_core.py +63 -41
onnx_ir/_enums.py +150 -8
onnx_ir/passes/common/initializer_deduplication.py +22 -10
onnx_ir/serde.py +27 -4
{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/METADATA +7 -3
{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/RECORD +12 -12
{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/WHEEL +0 -0
{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/licenses/LICENSE +0 -0
{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/top_level.txt +0 -0

onnx_ir/__init__.py CHANGED Viewed

@@ -78,6 +78,7 @@ __all__ = [
     # Convenience constructors
     "tensor",
     "node",
+    "val",
     # Pass infrastructure
     "passes",
     # IO
@@ -90,7 +91,7 @@ __all__ = [
 import types
 from onnx_ir import convenience, external_data, passes, serde, tape, traversal
-from onnx_ir._convenience._constructors import node, tensor
+from onnx_ir._convenience._constructors import node, tensor, val
 from onnx_ir._core import (
     Attr,
     AttrFloat32,
@@ -167,4 +168,4 @@ def __set_module() -> None:
 __set_module()
-__version__ = "0.1.7"
+__version__ = "0.1.9"

onnx_ir/_convenience/__init__.py CHANGED Viewed

@@ -226,7 +226,7 @@ def convert_attributes(
         ...     "type_protos": [ir.TensorType(ir.DataType.FLOAT), ir.TensorType(ir.DataType.FLOAT)],
         ... }
         >>> convert_attributes(attrs)
-        [Attr('int', INT, 1), Attr('float', FLOAT, 1.0), Attr('str', STRING, 'hello'), Attr('ints', INTS, [1, 2, 3]), Attr('floats', FLOATS, [1.0, 2.0, 3.0]), Attr('strings', STRINGS, ['hello', 'world']), Attr('tensor', TENSOR, Tensor<DOUBLE,[3]>(array([1., 2., 3.]), name=None)), Attr('tensor_proto', TENSOR, TensorProtoTensor<FLOAT,[3]>(array([1., 2., 3.], dtype=float32), name='proto')), Attr('graph', GRAPH, Graph(
+        [Attr('int', INT, 1), Attr('float', FLOAT, 1.0), Attr('str', STRING, 'hello'), Attr('ints', INTS, (1, 2, 3)), Attr('floats', FLOATS, (1.0, 2.0, 3.0)), Attr('strings', STRINGS, ('hello', 'world')), Attr('tensor', TENSOR, Tensor<DOUBLE,[3]>(array([1., 2., 3.]), name=None)), Attr('tensor_proto', TENSOR, TensorProtoTensor<FLOAT,[3]>(array([1., 2., 3.], dtype=float32), name='proto')), Attr('graph', GRAPH, Graph(
             name='graph0',
             inputs=(
         <BLANKLINE>
@@ -235,7 +235,7 @@ def convert_attributes(
         <BLANKLINE>
             ),
             len()=0
-        )), Attr('graphs', GRAPHS, [Graph(
+        )), Attr('graphs', GRAPHS, (Graph(
             name='graph1',
             inputs=(
         <BLANKLINE>
@@ -253,7 +253,7 @@ def convert_attributes(
         <BLANKLINE>
             ),
             len()=0
-        )]), Attr('type_proto', TYPE_PROTO, Tensor(FLOAT)), Attr('type_protos', TYPE_PROTOS, [Tensor(FLOAT), Tensor(FLOAT)])]
+        ))), Attr('type_proto', TYPE_PROTO, Tensor(FLOAT)), Attr('type_protos', TYPE_PROTOS, (Tensor(FLOAT), Tensor(FLOAT)))]
     .. important::
         An empty sequence should be created with an explicit type by initializing
@@ -293,7 +293,7 @@ def replace_all_uses_with(
     We want to replace the node A with a new node D::
         >>> import onnx_ir as ir
-        >>> input = ir.Input("input")
+        >>> input = ir.val("input")
         >>> node_a = ir.Node("", "A", [input])
         >>> node_b = ir.Node("", "B", node_a.outputs)
         >>> node_c = ir.Node("", "C", node_a.outputs)

onnx_ir/_convenience/_constructors.py CHANGED Viewed

@@ -25,7 +25,7 @@ if typing.TYPE_CHECKING:
 def tensor(
     value: npt.ArrayLike | onnx.TensorProto | ir.DLPackCompatible | ir.ArrayCompatible,
-    dtype: _enums.DataType | None = None,
+    dtype: ir.DataType | None = None,
     name: str | None = None,
     doc_string: str | None = None,
 ) -> _protocols.TensorProtocol:
@@ -159,7 +159,7 @@ def node(
     doc_string: str | None = None,
     metadata_props: dict[str, str] | None = None,
 ) -> ir.Node:
-    """Create an :class:`~onnx_ir.Node`.
+    """Create a :class:`~onnx_ir.Node`.
     This is a convenience constructor for creating a Node that supports Python
     objects as attributes.
@@ -167,8 +167,8 @@ def node(
     Example::
         >>> import onnx_ir as ir
-        >>> input_a = ir.Input("A", shape=ir.Shape([1, 2]), type=ir.TensorType(ir.DataType.INT32))
-        >>> input_b = ir.Input("B", shape=ir.Shape([1, 2]), type=ir.TensorType(ir.DataType.INT32))
+        >>> input_a = ir.val("A", shape=[1, 2], type=ir.TensorType(ir.DataType.INT32))
+        >>> input_b = ir.val("B", shape=[1, 2], type=ir.TensorType(ir.DataType.INT32))
         >>> node = ir.node(
         ...     "SomeOp",
         ...     inputs=[input_a, input_b],
@@ -215,3 +215,74 @@ def node(
         doc_string=doc_string,
         metadata_props=metadata_props,
     )
+def val(
+    name: str | None,
+    dtype: ir.DataType | None = None,
+    shape: ir.Shape | Sequence[int | str | None] | None = None,
+    *,
+    type: ir.TypeProtocol | None = None,
+    const_value: ir.TensorProtocol | None = None,
+) -> ir.Value:
+    """Create a :class:`~onnx_ir.Value` with the given name and type.
+    This is a convenience constructor for creating a Value that allows you to specify
+    dtype and shape in a more relaxed manner. Whereas to create a Value directly, you
+    need to create a :class:`~onnx_ir.TypeProtocol` and :class:`~onnx_ir.Shape` object
+    first, this function allows you to specify dtype as a :class:`~onnx_ir.DataType`
+    and shape as a sequence of integers or symbolic dimensions.
+    Example::
+        >>> import onnx_ir as ir
+        >>> t = ir.val("x", ir.DataType.FLOAT, ["N", 42, 3])
+        >>> t.name
+        'x'
+        >>> t.type
+        Tensor(FLOAT)
+        >>> t.shape
+        Shape([SymbolicDim(N), 42, 3])
+    .. versionadded:: 0.1.9
+    Args:
+        name: The name of the value.
+        dtype: The data type of the TensorType of the value. This is used only when type is None.
+        shape: The shape of the value.
+        type: The type of the value. Only one of dtype and type can be specified.
+        const_value: The constant tensor that initializes the value. Supply this argument
+            when you want to create an initializer. The type and shape can be obtained from the tensor.
+    Returns:
+        A Value object.
+    """
+    if const_value is not None:
+        const_tensor_type = _core.TensorType(const_value.dtype)
+        if type is not None and type != const_tensor_type:
+            raise ValueError(
+                f"The type does not match the const_value. type={type} but const_value has type {const_tensor_type}. "
+                "You do not have to specify the type when const_value is provided."
+            )
+        if dtype is not None and dtype != const_value.dtype:
+            raise ValueError(
+                f"The dtype does not match the const_value. dtype={dtype} but const_value has dtype {const_value.dtype}. "
+                "You do not have to specify the dtype when const_value is provided."
+            )
+        if shape is not None and _core.Shape(shape) != const_value.shape:
+            raise ValueError(
+                f"The shape does not match the const_value. shape={shape} but const_value has shape {const_value.shape}. "
+                "You do not have to specify the shape when const_value is provided."
+            )
+        return _core.Value(
+            name=name,
+            type=const_tensor_type,
+            shape=_core.Shape(const_value.shape),  # type: ignore
+            const_value=const_value,
+        )
+    if type is None and dtype is not None:
+        type = _core.TensorType(dtype)
+    if shape is not None and not isinstance(shape, _core.Shape):
+        shape = _core.Shape(shape)
+    return _core.Value(name=name, type=type, shape=shape)

onnx_ir/_core.py CHANGED Viewed

@@ -45,7 +45,7 @@ from typing import (
 import ml_dtypes
 import numpy as np
-from typing_extensions import TypeIs
+from typing_extensions import TypeIs, deprecated
 import onnx_ir
 from onnx_ir import (
@@ -836,6 +836,11 @@ class StringTensor(TensorBase, _protocols.TensorProtocol):  # pylint: disable=to
         """The shape of the tensor. Immutable."""
         return self._shape
+    @property
+    def nbytes(self) -> int:
+        """The number of bytes in the tensor."""
+        return sum(len(string) for string in self.string_data())
     @property
     def raw(self) -> Sequence[bytes] | npt.NDArray[np.bytes_]:
         """Backing data of the tensor. Immutable."""
@@ -2270,6 +2275,7 @@ class Value(_protocols.ValueProtocol, _display.PrettyPrintable):
         return self._is_initializer
+@deprecated("Input is deprecated since 0.1.9. Use ir.val(...) instead.")
 def Input(
     name: str | None = None,
     shape: Shape | None = None,
@@ -3392,6 +3398,31 @@ class Attr(
         *,
         doc_string: str | None = None,
     ) -> None:
+        # Quick checks to ensure that INT and FLOAT attributes are stored as int and float,
+        # not np.int32, np.float32, bool, etc.
+        # This also allows errors to be raised at the time of construction instead of later
+        # during serialization.
+        # TODO(justinchuby): Use case matching when we drop support for Python 3.9
+        if value is None:
+            # Value can be None for reference attributes or when it is used as a
+            # placeholder for schemas
+            pass
+        elif type == _enums.AttributeType.INT:
+            value = int(value)
+        elif type == _enums.AttributeType.FLOAT:
+            value = float(value)
+        elif type == _enums.AttributeType.INTS:
+            value = tuple(int(v) for v in value)
+        elif type == _enums.AttributeType.FLOATS:
+            value = tuple(float(v) for v in value)
+        elif type in {
+            _enums.AttributeType.STRINGS,
+            _enums.AttributeType.TENSORS,
+            _enums.AttributeType.GRAPHS,
+            _enums.AttributeType.TYPE_PROTOS,
+        }:
+            value = tuple(value)
         self._name = name
         self._type = type
         self._value = value
@@ -3453,7 +3484,7 @@ class Attr(
             return f"@{self.ref_attr_name}"
         if self.type == _enums.AttributeType.GRAPH:
             return textwrap.indent("\n" + str(self.value), " " * 4)
-        return str(self.value)
+        return repr(self.value)
     def __repr__(self) -> str:
         if self.is_ref():
@@ -3467,8 +3498,8 @@ class Attr(
             raise TypeError(
                 f"Attribute '{self.name}' is not of type FLOAT. Actual type: {self.type}"
             )
-        # Do not use isinstance check because it may prevent np.float32 etc. from being used
-        return float(self.value)
+        # value is guaranteed to be a float in the constructor
+        return self.value
     def as_int(self) -> int:
         """Get the attribute value as an int."""
@@ -3476,8 +3507,8 @@ class Attr(
             raise TypeError(
                 f"Attribute '{self.name}' is not of type INT. Actual type: {self.type}"
             )
-        # Do not use isinstance check because it may prevent np.int32 etc. from being used
-        return int(self.value)
+        # value is guaranteed to be an int in the constructor
+        return self.value
     def as_string(self) -> str:
         """Get the attribute value as a string."""
@@ -3485,9 +3516,10 @@ class Attr(
             raise TypeError(
                 f"Attribute '{self.name}' is not of type STRING. Actual type: {self.type}"
             )
-        if not isinstance(self.value, str):
+        value = self.value
+        if not isinstance(value, str):
             raise TypeError(f"Value of attribute '{self!r}' is not a string.")
-        return self.value
+        return value
     def as_tensor(self) -> _protocols.TensorProtocol:
         """Get the attribute value as a tensor."""
@@ -3495,9 +3527,10 @@ class Attr(
             raise TypeError(
                 f"Attribute '{self.name}' is not of type TENSOR. Actual type: {self.type}"
             )
-        if not isinstance(self.value, _protocols.TensorProtocol):
+        value = self.value
+        if not isinstance(value, _protocols.TensorProtocol):
             raise TypeError(f"Value of attribute '{self!r}' is not a tensor.")
-        return self.value
+        return value
     def as_graph(self) -> Graph:
         """Get the attribute value as a graph."""
@@ -3505,75 +3538,64 @@ class Attr(
             raise TypeError(
                 f"Attribute '{self.name}' is not of type GRAPH. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Graph):
+        value = self.value
+        if not isinstance(value, Graph):
             raise TypeError(f"Value of attribute '{self!r}' is not a graph.")
-        return self.value
+        return value
-    def as_floats(self) -> Sequence[float]:
+    def as_floats(self) -> tuple[float, ...]:
         """Get the attribute value as a sequence of floats."""
         if self.type != _enums.AttributeType.FLOATS:
             raise TypeError(
                 f"Attribute '{self.name}' is not of type FLOATS. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Sequence):
-            raise TypeError(f"Value of attribute '{self!r}' is not a Sequence.")
-        # Do not use isinstance check on elements because it may prevent np.int32 etc. from being used
-        # Create a copy of the list to prevent mutation
-        return [float(v) for v in self.value]
+        # value is guaranteed to be a sequence of float in the constructor
+        return self.value
-    def as_ints(self) -> Sequence[int]:
+    def as_ints(self) -> tuple[int, ...]:
         """Get the attribute value as a sequence of ints."""
         if self.type != _enums.AttributeType.INTS:
             raise TypeError(
                 f"Attribute '{self.name}' is not of type INTS. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Sequence):
-            raise TypeError(f"Value of attribute '{self!r}' is not a Sequence.")
-        # Do not use isinstance check on elements because it may prevent np.int32 etc. from being used
-        # Create a copy of the list to prevent mutation
-        return list(self.value)
+        # value is guaranteed to be a sequence of int in the constructor
+        return self.value
-    def as_strings(self) -> Sequence[str]:
+    def as_strings(self) -> tuple[str, ...]:
         """Get the attribute value as a sequence of strings."""
         if self.type != _enums.AttributeType.STRINGS:
             raise TypeError(
                 f"Attribute '{self.name}' is not of type STRINGS. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Sequence):
-            raise TypeError(f"Value of attribute '{self!r}' is not a Sequence.")
         if onnx_ir.DEBUG:
             if not all(isinstance(x, str) for x in self.value):
                 raise TypeError(f"Value of attribute '{self!r}' is not a Sequence of strings.")
-        # Create a copy of the list to prevent mutation
-        return list(self.value)
+        # value is guaranteed to be a sequence in the constructor
+        return self.value
-    def as_tensors(self) -> Sequence[_protocols.TensorProtocol]:
+    def as_tensors(self) -> tuple[_protocols.TensorProtocol, ...]:
         """Get the attribute value as a sequence of tensors."""
         if self.type != _enums.AttributeType.TENSORS:
             raise TypeError(
                 f"Attribute '{self.name}' is not of type TENSORS. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Sequence):
-            raise TypeError(f"Value of attribute '{self!r}' is not a Sequence.")
         if onnx_ir.DEBUG:
             if not all(isinstance(x, _protocols.TensorProtocol) for x in self.value):
                 raise TypeError(f"Value of attribute '{self!r}' is not a Sequence of tensors.")
-        # Create a copy of the list to prevent mutation
-        return list(self.value)
+        # value is guaranteed to be a sequence in the constructor
+        return tuple(self.value)
-    def as_graphs(self) -> Sequence[Graph]:
+    def as_graphs(self) -> tuple[Graph, ...]:
         """Get the attribute value as a sequence of graphs."""
         if self.type != _enums.AttributeType.GRAPHS:
             raise TypeError(
                 f"Attribute '{self.name}' is not of type GRAPHS. Actual type: {self.type}"
             )
-        if not isinstance(self.value, Sequence):
-            raise TypeError(f"Value of attribute '{self!r}' is not a Sequence.")
         if onnx_ir.DEBUG:
             if not all(isinstance(x, Graph) for x in self.value):
                 raise TypeError(f"Value of attribute '{self!r}' is not a Sequence of graphs.")
-        # Create a copy of the list to prevent mutation
-        return list(self.value)
+        # value is guaranteed to be a sequence in the constructor
+        return tuple(self.value)
 # NOTE: The following functions are just for convenience
@@ -3600,7 +3622,7 @@ def RefAttr(
     return Attr(name, type, None, ref_attr_name=ref_attr_name, doc_string=doc_string)
-def AttrFloat32(name: str, value: float, doc_string: str | None = None) -> Attr:
+def AttrFloat32(name: str, value: float | np.floating, doc_string: str | None = None) -> Attr:
     """Create a float attribute."""
     # NOTE: The function name is capitalized to maintain API backward compatibility.
     return Attr(
@@ -3611,7 +3633,7 @@ def AttrFloat32(name: str, value: float, doc_string: str | None = None) -> Attr:
     )
-def AttrInt64(name: str, value: int, doc_string: str | None = None) -> Attr:
+def AttrInt64(name: str, value: int | np.integer, doc_string: str | None = None) -> Attr:
     """Create an int attribute."""
     # NOTE: The function name is capitalized to maintain API backward compatibility.
     return Attr(

onnx_ir/_enums.py CHANGED Viewed

@@ -5,6 +5,7 @@
 from __future__ import annotations
 import enum
+from typing import Any
 import ml_dtypes
 import numpy as np
@@ -77,7 +78,7 @@ class DataType(enum.IntEnum):
         if dtype in _NP_TYPE_TO_DATA_TYPE:
             return cls(_NP_TYPE_TO_DATA_TYPE[dtype])
-        if np.issubdtype(dtype, np.str_):
+        if np.issubdtype(dtype, np.str_) or np.issubdtype(dtype, np.bytes_):
             return DataType.STRING
         # Special cases for handling custom dtypes defined in ONNX (as of onnx 1.18)
@@ -131,6 +132,146 @@ class DataType(enum.IntEnum):
             raise TypeError(f"Bitwidth not available for ONNX data type: {self}")
         return _BITWIDTH_MAP[self]
+    @property
+    def exponent_bitwidth(self) -> int:
+        """Returns the bit width of the exponent for floating-point types.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not supported.
+        """
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).nexp
+        raise TypeError(f"Exponent not available for ONNX data type: {self}")
+    @property
+    def mantissa_bitwidth(self) -> int:
+        """Returns the bit width of the mantissa for floating-point types.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not supported.
+        """
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).nmant
+        raise TypeError(f"Mantissa not available for ONNX data type: {self}")
+    @property
+    def eps(self) -> int | np.floating[Any]:
+        """Returns the difference between 1.0 and the next smallest representable float larger than 1.0 for the ONNX data type.
+        Returns 1 for integers.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return 1
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).eps
+        raise TypeError(f"Eps not available for ONNX data type: {self}")
+    @property
+    def tiny(self) -> int | np.floating[Any]:
+        """Returns the smallest positive non-zero value for the ONNX data type.
+        Returns 1 for integers.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return 1
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).tiny
+        raise TypeError(f"Tiny not available for ONNX data type: {self}")
+    @property
+    def min(self) -> int | np.floating[Any]:
+        """Returns the minimum representable value for the ONNX data type.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return ml_dtypes.iinfo(self.numpy()).min
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).min
+        raise TypeError(f"Minimum not available for ONNX data type: {self}")
+    @property
+    def max(self) -> int | np.floating[Any]:
+        """Returns the maximum representable value for the ONNX data type.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return ml_dtypes.iinfo(self.numpy()).max
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).max
+        raise TypeError(f"Maximum not available for ONNX data type: {self}")
+    @property
+    def precision(self) -> int:
+        """Returns the precision for the ONNX dtype if supported.
+        For floats returns the approximate number of decimal digits to which
+        this kind of float is precise. Returns 0 for integers.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return 0
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).precision
+        raise TypeError(f"Precision not available for ONNX data type: {self}")
+    @property
+    def resolution(self) -> int | np.floating[Any]:
+        """Returns the resolution for the ONNX dtype if supported.
+        Returns the approximate decimal resolution of this type, i.e.,
+         10**-precision. Returns 1 for integers.
+        .. versionadded:: 0.1.8
+        Raises:
+            TypeError: If the data type is not a numeric data type.
+        """
+        if self.is_integer():
+            return 1
+        if self.is_floating_point():
+            return ml_dtypes.finfo(self.numpy()).resolution
+        raise TypeError(f"Resolution not available for ONNX data type: {self}")
     def numpy(self) -> np.dtype:
         """Returns the numpy dtype for the ONNX data type.
@@ -215,6 +356,13 @@ class DataType(enum.IntEnum):
             DataType.FLOAT8E8M0,
         }
+    def is_string(self) -> bool:
+        """Returns True if the data type is a string type.
+        .. versionadded:: 0.1.8
+        """
+        return self == DataType.STRING
     def __repr__(self) -> str:
         return self.name
@@ -274,15 +422,9 @@ _NP_TYPE_TO_DATA_TYPE = {
     np.dtype(ml_dtypes.float8_e8m0fnu): DataType.FLOAT8E8M0,
     np.dtype(ml_dtypes.int4): DataType.INT4,
     np.dtype(ml_dtypes.uint4): DataType.UINT4,
+    np.dtype(ml_dtypes.float4_e2m1fn): DataType.FLOAT4E2M1,
 }
-# TODO(after min req for ml_dtypes>=0.5): Move this inside _NP_TYPE_TO_DATA_TYPE
-_NP_TYPE_TO_DATA_TYPE.update(
-    {np.dtype(ml_dtypes.float4_e2m1fn): DataType.FLOAT4E2M1}
-    if hasattr(ml_dtypes, "float4_e2m1fn")
-    else {}
-)
 # ONNX DataType to Numpy dtype.
 _DATA_TYPE_TO_NP_TYPE = {v: k for k, v in _NP_TYPE_TO_DATA_TYPE.items()}

onnx_ir/passes/common/initializer_deduplication.py CHANGED Viewed

@@ -10,6 +10,8 @@ __all__ = ["DeduplicateInitializersPass", "DeduplicateHashedInitializersPass"]
 import hashlib
 import logging
+import numpy as np
 import onnx_ir as ir
 logger = logging.getLogger(__name__)
@@ -42,17 +44,27 @@ def _should_skip_initializer(initializer: ir.Value, size_limit: int) -> bool:
             size_limit,
         )
         return True
-    if const_val.dtype == ir.DataType.STRING:
-        # Skip string initializers as they don't have a bytes representation
-        logger.warning(
-            "Skipped deduplication of string initializer '%s' (unsupported yet)",
-            initializer.name,
-        )
-        return True
     return False
+def _tobytes(val):
+    """StringTensor does not support tobytes. Use 'string_data' instead.
+    However, 'string_data' yields a list of bytes which cannot be hashed, i.e.,
+    cannot be used to index into a dict. To generate keys for identifying
+    tensors in initializer deduplication the following converts the list of
+    bytes to an array of fixed-length strings which can be flattened into a
+    bytes-string. This, together with the tensor shape, is sufficient for
+    identifying tensors for deduplication, but it differs from the
+    representation used for serializing tensors (that is string_data) by adding
+    padding bytes so that each string occupies the same number of consecutive
+    bytes in the flattened .tobytes representation.
+    """
+    if val.dtype.is_string():
+        return np.array(val.string_data()).tobytes()
+    return val.tobytes()
 class DeduplicateInitializersPass(ir.passes.InPlacePass):
     """Remove duplicated initializer tensors from the main graph and all subgraphs.
@@ -84,7 +96,7 @@ class DeduplicateInitializersPass(ir.passes.InPlacePass):
                 const_val = initializer.const_value
                 assert const_val is not None
-                key = (const_val.dtype, tuple(const_val.shape), const_val.tobytes())
+                key = (const_val.dtype, tuple(const_val.shape), _tobytes(const_val))
                 if key in initializers:
                     modified = True
                     initializer_to_keep = initializers[key]  # type: ignore[index]
@@ -143,7 +155,7 @@ class DeduplicateHashedInitializersPass(ir.passes.InPlacePass):
                 key = (const_val.dtype, tensor_dims, tensor_digest)
                 if key in initializers:
-                    if initializers[key].const_value.tobytes() != const_val.tobytes():
+                    if _tobytes(initializers[key].const_value) != _tobytes(const_val):
                         logger.warning(
                             "Initializer deduplication failed: "
                             "hashes match but values differ with values %s and %s",

onnx_ir/serde.py CHANGED Viewed

@@ -711,7 +711,7 @@ def _deserialize_graph(
     # Create values for initializers and inputs
     initializer_tensors = [deserialize_tensor(tensor) for tensor in proto.initializer]
-    inputs = [_core.Input(info.name) for info in proto.input]
+    inputs = [_core.Value(name=info.name) for info in proto.input]
     for info, value in zip(proto.input, inputs):
         deserialize_value_info_proto(info, value)
@@ -869,7 +869,7 @@ def deserialize_function(proto: onnx.FunctionProto) -> _core.Function:
     Returns:
         An IR Function object representing the ONNX function.
     """
-    inputs = [_core.Input(name) for name in proto.input]
+    inputs = [_core.Value(name=name) for name in proto.input]
     values: dict[str, _core.Value] = {v.name: v for v in inputs}  # type: ignore[misc]
     value_info = {info.name: info for info in getattr(proto, "value_info", [])}
@@ -1143,7 +1143,19 @@ def _deserialize_attribute(
     if type_ == _enums.AttributeType.FLOAT:
         return _core.AttrFloat32(name, proto.f, doc_string=doc_string)
     if type_ == _enums.AttributeType.STRING:
-        return _core.AttrString(name, proto.s.decode("utf-8"), doc_string=doc_string)
+        try:
+            return _core.AttrString(name, proto.s.decode("utf-8"), doc_string=doc_string)
+        except UnicodeDecodeError:
+            # Even though onnx.ai/onnx/repo-docs/IR.html#attributes requires the attribute
+            # for strings to be utf-8 encoded bytes, custom ops may still store arbitrary data there
+            logger.warning(
+                "Attribute %r contains invalid UTF-8 bytes. ONNX spec requires string attributes "
+                "to be UTF-8 encoded so the model is invalid. We will skip decoding the attribute and "
+                "use the bytes as attribute value",
+                name,
+            )
+            return _core.Attr(name, type_, proto.s, doc_string=doc_string)
     if type_ == _enums.AttributeType.INTS:
         return _core.AttrInt64s(name, proto.ints, doc_string=doc_string)
     if type_ == _enums.AttributeType.FLOATS:
@@ -1792,7 +1804,18 @@ def _fill_in_value_for_attribute(
         attribute_proto.type = onnx.AttributeProto.FLOAT
     elif type_ == _enums.AttributeType.STRING:
         # value: str
-        attribute_proto.s = value.encode("utf-8")
+        if type(value) is bytes:
+            # Even though onnx.ai/onnx/repo-docs/IR.html#attributes requires the attribute
+            # for strings to be utf-8 encoded bytes, custom ops may still store arbitrary data there
+            logger.warning(
+                "Value in attribute %r should be a string but is instead bytes. ONNX "
+                "spec requires string attributes to be UTF-8 encoded so the model is invalid. "
+                "We will skip encoding the attribute and use the bytes as attribute value",
+                attribute_proto.name,
+            )
+            attribute_proto.s = value
+        else:
+            attribute_proto.s = value.encode("utf-8")
         attribute_proto.type = onnx.AttributeProto.STRING
     elif type_ == _enums.AttributeType.INTS:
         # value: Sequence[int]

{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: onnx-ir
-Version: 0.1.7
+Version: 0.1.9
 Summary: Efficient in-memory representation for ONNX
 Author-email: ONNX Contributors <onnx-technical-discuss@lists.lfaidata.foundation>
 License-Expression: Apache-2.0
@@ -19,10 +19,10 @@ License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: onnx>=1.16
 Requires-Dist: typing_extensions>=4.10
-Requires-Dist: ml_dtypes
+Requires-Dist: ml_dtypes>=0.5.0
 Dynamic: license-file
-# ONNX IR
+# <img src="docs/_static/logo-light.png" alt="ONNX IR" width="250"/>
 [![PyPI - Version](https://img.shields.io/pypi/v/onnx-ir.svg)](https://pypi.org/project/onnx-ir)
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/onnx-ir.svg)](https://pypi.org/project/onnx-ir)
@@ -60,6 +60,10 @@ pip install git+https://github.com/onnx/ir-py.git
 - Pythonic and familiar APIs: Classes define Pythonic apis and still map to ONNX protobuf concepts in an intuitive way.
 - No protobuf dependency: The IR does not require protobuf once the model is converted to the IR representation, decoupling from the serialization format.
+## Concept Diagram
+![Concept Diagram](docs/resource/onnx-ir-entities.svg)
 ## Code Organization 🗺️
 - [`_protocols.py`](src/onnx_ir/_protocols.py): Interfaces defined for all entities in the IR.

{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-onnx_ir/__init__.py,sha256=GkXeM2FSKjT0TUO8ezCJdT1yHZKdtQ6keZKx2a3BluI,3424
-onnx_ir/_core.py,sha256=XQRd43VQj72qBGLa_4x9NEjjfhM0rxJ7qT6sLKA_rGA,139032
+onnx_ir/__init__.py,sha256=GONmwgFPw_4lRywnqZUQz_oOG8p-JP-PwaUAiYKls8Q,3440
+onnx_ir/_core.py,sha256=ALDyEiVvZP6bsAmnBSYKPgCeKBHqcYVv5_wAHwRhf20,139578
 onnx_ir/_display.py,sha256=230bMN_hVy47Ug3HkA4o5Tf5Hr21AnBEoq5w0fxjyTs,1300
-onnx_ir/_enums.py,sha256=SxC-GGgPrmdz6UsMhx7xT9-6VmkZ6j1oVzDqNUHr3Rc,9659
+onnx_ir/_enums.py,sha256=E7WQ7yQzulBeimamc9q_k4fEUoyH_2PWtaOMpwck_W0,13915
 onnx_ir/_graph_comparison.py,sha256=8_D1gu547eCDotEUqxfIJhUGU_Ufhfji7sfsSraOj3g,727
 onnx_ir/_graph_containers.py,sha256=PRKrshRZ5rzWCgRs1TefzJq9n8wyo7OqeKy3XxMhyys,14265
 onnx_ir/_io.py,sha256=GWwA4XOZ-ZX1cgibgaYD0K0O5d9LX21ZwcBN02Wrh04,5205
@@ -16,13 +16,13 @@ onnx_ir/_version_utils.py,sha256=bZThuE7meVHFOY1DLsmss9WshVIp9iig7udGfDbVaK4,133
 onnx_ir/convenience.py,sha256=0B1epuXZCSmY4FbW2vaYfR-t5ubxBZ1UruiytHs-zFw,917
 onnx_ir/external_data.py,sha256=rXHtRU-9tjAt10Iervhr5lsI6Dtv-EhR7J4brxppImA,18079
 onnx_ir/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-onnx_ir/serde.py,sha256=Ld00k4L_TJ50T8FA0myV0C1hLr7EqwujZk6bBr_nGLQ,78174
+onnx_ir/serde.py,sha256=2us-I2h3_BXxM9aYyXFOdq5v_cLvLIwyHGpIZbhL2W0,79459
 onnx_ir/tape.py,sha256=4FyfAHmVhQoMsfHMYnBwP2azi6UF6b6pj--ercObqZs,350
 onnx_ir/tensor_adapters.py,sha256=YffUeZDZi8thxm-4nF2cL6cNSJSVmLm4A3IbEzwY8QQ,7233
 onnx_ir/testing.py,sha256=WTrjf2joWizDWaYMJlV1KjZMQw7YmZ8NvuBTVn1uY6s,8803
 onnx_ir/traversal.py,sha256=Wy4XphwuapAvm94-5iaz6G8LjIoMFpY7qfPfXzYViEE,4488
-onnx_ir/_convenience/__init__.py,sha256=bXUxjZ_91idQJ33zWtByQ0J4VsWCUdvAy9iIflpLtW8,19754
-onnx_ir/_convenience/_constructors.py,sha256=5GhlYy_xCE2ng7l_4cNx06WQsNDyvS-0U1HgOpPKJEk,8347
+onnx_ir/_convenience/__init__.py,sha256=SO7kc8RXVKEUODGh0q2Y7WgmbUsOjYSixmKFx_A0DAQ,19752
+onnx_ir/_convenience/_constructors.py,sha256=ETYrhJ5eg4ozf4K9C-5mT1vw1lxrdLCcWE4CJEGbl-k,11304
 onnx_ir/_thirdparty/asciichartpy.py,sha256=afQ0fsqko2uYRPAR4TZBrQxvCb4eN8lxZ2yDFbVQq_s,10533
 onnx_ir/passes/__init__.py,sha256=M_Tcl_-qGSNPluFIvOoeDyh0qAwNayaYyXDS5UJUJPQ,764
 onnx_ir/passes/_pass_infra.py,sha256=xIOw_zZIuOqD4Z_wZ4OvsqXfh2IZMoMlDp1xQ_MPQlc,9567
@@ -32,15 +32,15 @@ onnx_ir/passes/common/clear_metadata_and_docstring.py,sha256=YwouLfsNFSaTuGd7uMO
 onnx_ir/passes/common/common_subexpression_elimination.py,sha256=wZ1zEPdCshYB_ifP9fCAVfzQkesE6uhCfzCuL2qO5fA,7948
 onnx_ir/passes/common/constant_manipulation.py,sha256=dFzzqbpRecJJrYf6edvR_sdr4F0gV-1wEtDXsQ7fStM,9101
 onnx_ir/passes/common/identity_elimination.py,sha256=wN8g8uPGn6IIQ6Jf1lo6nGTXvpWyiSQtT_CfmtvZpwA,3664
-onnx_ir/passes/common/initializer_deduplication.py,sha256=k6IZdXrjANbVhTQCQAPIePUjqF83NG3YGwEYThYJJ7o,6655
+onnx_ir/passes/common/initializer_deduplication.py,sha256=gKrXTMFAtCkMmiIm8zWzwPnwSbRdZxunJeAt_jFU-vY,7253
 onnx_ir/passes/common/inliner.py,sha256=wBoO6yXt6F1AObQjYZHMQ0wn3YH681N4HQQVyaMAYd4,13702
 onnx_ir/passes/common/naming.py,sha256=NNKc9IPrmzm3J0zGQILfooayVzfdXDYHY9DHex1hFgs,10927
 onnx_ir/passes/common/onnx_checker.py,sha256=_sPmJ2ff9pDB1g9q7082BL6fyubomRaj6svE0cCyDew,1691
 onnx_ir/passes/common/shape_inference.py,sha256=LVdvxjeKtcIEbPcb6mKisxoPJOOawzsm3tzk5j9xqeM,3992
 onnx_ir/passes/common/topological_sort.py,sha256=Vcu1YhBdfRX4LROr0NScjB1Pwz2DjBFD0Z_GxqaxPF8,999
 onnx_ir/passes/common/unused_removal.py,sha256=cBNqaqGnUVyCWxsD7hBzYk4qSglVPo3SmHAvkUo5-Oc,7613
-onnx_ir-0.1.7.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-onnx_ir-0.1.7.dist-info/METADATA,sha256=M4-BdpNXpv18P_tALf6KdUdXeCO2JrVxbxtzs4HCmJI,3462
-onnx_ir-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-onnx_ir-0.1.7.dist-info/top_level.txt,sha256=W5tROO93YjO0XRxIdjMy4wocp-5st5GiI2ukvW7UhDo,8
-onnx_ir-0.1.7.dist-info/RECORD,,
+onnx_ir-0.1.9.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+onnx_ir-0.1.9.dist-info/METADATA,sha256=UnIaOip9p965JE-B8Kb3cuUrDTuoGoQurFmLAgsWdAA,3604
+onnx_ir-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+onnx_ir-0.1.9.dist-info/top_level.txt,sha256=W5tROO93YjO0XRxIdjMy4wocp-5st5GiI2ukvW7UhDo,8
+onnx_ir-0.1.9.dist-info/RECORD,,

{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{onnx_ir-0.1.7.dist-info → onnx_ir-0.1.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

onnx-ir 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

Potentially problematic release.

onnx-ir 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl