PyPI - onnx-ir - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

onnx-ir 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of onnx-ir might be problematic. Click here for more details.

Files changed (27) hide show

onnx_ir/__init__.py +5 -2
onnx_ir/_convenience/__init__.py +125 -4
onnx_ir/_convenience/_constructors.py +6 -2
onnx_ir/_core.py +291 -76
onnx_ir/_enums.py +35 -25
onnx_ir/_graph_containers.py +114 -9
onnx_ir/_io.py +40 -4
onnx_ir/_type_casting.py +2 -1
onnx_ir/_version_utils.py +5 -48
onnx_ir/convenience.py +3 -1
onnx_ir/external_data.py +43 -3
onnx_ir/passes/_pass_infra.py +1 -1
onnx_ir/passes/common/__init__.py +4 -0
onnx_ir/passes/common/_c_api_utils.py +1 -1
onnx_ir/passes/common/common_subexpression_elimination.py +177 -0
onnx_ir/passes/common/constant_manipulation.py +10 -25
onnx_ir/passes/common/inliner.py +4 -3
onnx_ir/passes/common/onnx_checker.py +1 -1
onnx_ir/passes/common/shape_inference.py +1 -1
onnx_ir/passes/common/unused_removal.py +1 -1
onnx_ir/serde.py +171 -6
{onnx_ir-0.1.0.dist-info → onnx_ir-0.1.2.dist-info}/METADATA +22 -4
onnx_ir-0.1.2.dist-info/RECORD +42 -0
onnx_ir-0.1.0.dist-info/RECORD +0 -41
{onnx_ir-0.1.0.dist-info → onnx_ir-0.1.2.dist-info}/WHEEL +0 -0
{onnx_ir-0.1.0.dist-info → onnx_ir-0.1.2.dist-info}/licenses/LICENSE +0 -0
{onnx_ir-0.1.0.dist-info → onnx_ir-0.1.2.dist-info}/top_level.txt +0 -0

onnx_ir/_enums.py CHANGED Viewed

@@ -114,7 +114,18 @@ class DataType(enum.IntEnum):
     @property
     def itemsize(self) -> float:
         """Returns the size of the data type in bytes."""
-        return _ITEMSIZE_MAP[self]
+        return self.bitwidth / 8
+    @property
+    def bitwidth(self) -> int:
+        """Returns the bit width of the data type.
+        Raises:
+            TypeError: If the data type is not supported.
+        """
+        if self not in _BITWIDTH_MAP:
+            raise TypeError(f"Bitwidth not available for ONNX data type: {self}")
+        return _BITWIDTH_MAP[self]
     def numpy(self) -> np.dtype:
         """Returns the numpy dtype for the ONNX data type.
@@ -163,30 +174,29 @@ class DataType(enum.IntEnum):
         return self.__repr__()
-_ITEMSIZE_MAP = {
-    DataType.FLOAT: 4,
-    DataType.UINT8: 1,
-    DataType.INT8: 1,
-    DataType.UINT16: 2,
-    DataType.INT16: 2,
-    DataType.INT32: 4,
-    DataType.INT64: 8,
-    DataType.STRING: 1,
-    DataType.BOOL: 1,
-    DataType.FLOAT16: 2,
-    DataType.DOUBLE: 8,
-    DataType.UINT32: 4,
-    DataType.UINT64: 8,
-    DataType.COMPLEX64: 8,
-    DataType.COMPLEX128: 16,
-    DataType.BFLOAT16: 2,
-    DataType.FLOAT8E4M3FN: 1,
-    DataType.FLOAT8E4M3FNUZ: 1,
-    DataType.FLOAT8E5M2: 1,
-    DataType.FLOAT8E5M2FNUZ: 1,
-    DataType.UINT4: 0.5,
-    DataType.INT4: 0.5,
-    DataType.FLOAT4E2M1: 0.5,
+_BITWIDTH_MAP = {
+    DataType.FLOAT: 32,
+    DataType.UINT8: 8,
+    DataType.INT8: 8,
+    DataType.UINT16: 16,
+    DataType.INT16: 16,
+    DataType.INT32: 32,
+    DataType.INT64: 64,
+    DataType.BOOL: 8,
+    DataType.FLOAT16: 16,
+    DataType.DOUBLE: 64,
+    DataType.UINT32: 32,
+    DataType.UINT64: 64,
+    DataType.COMPLEX64: 64,  # 2 * 32
+    DataType.COMPLEX128: 128,  # 2 * 64
+    DataType.BFLOAT16: 16,
+    DataType.FLOAT8E4M3FN: 8,
+    DataType.FLOAT8E4M3FNUZ: 8,
+    DataType.FLOAT8E5M2: 8,
+    DataType.FLOAT8E5M2FNUZ: 8,
+    DataType.UINT4: 4,
+    DataType.INT4: 4,
+    DataType.FLOAT4E2M1: 4,
 }

onnx_ir/_graph_containers.py CHANGED Viewed

@@ -12,13 +12,16 @@ __all__ = [
 ]
 import collections
-from collections.abc import Iterable
-from typing import TYPE_CHECKING, SupportsIndex
+import logging
+from collections.abc import Iterable, Sequence
+from typing import SupportsIndex, TypeVar
 import onnx_ir
+from onnx_ir import _core, _protocols
-if TYPE_CHECKING:
-    from onnx_ir import _core
+T = TypeVar("T")
+logger = logging.getLogger(__name__)
 class _GraphIO(collections.UserList["_core.Value"]):
@@ -152,6 +155,10 @@ class GraphInputs(_GraphIO):
             raise ValueError(
                 f"Value '{value}' is already owned by a different graph. Please remove the value from the previous graph first"
             )
+        if value.producer() is not None:
+            raise ValueError(
+                f"Value '{value}' is produced by a node and cannot be an input to the graph. Please create new Values for graph inputs"
+            )
         self._ref_counter[value] += 1
         value._is_graph_input = True
         value._graph = self._graph
@@ -209,7 +216,7 @@ class GraphOutputs(_GraphIO):
 class GraphInitializers(collections.UserDict[str, "_core.Value"]):
-    """The initializers of a Graph."""
+    """The initializers of a Graph as ``dict[str, Value]`` with additional mutation methods."""
     def __init__(self, graph: _core.Graph, dict=None, /, **kwargs):
         # Perform checks first in _set_graph before modifying the data structure with super().__init__()
@@ -244,12 +251,23 @@ class GraphInitializers(collections.UserDict[str, "_core.Value"]):
     def __setitem__(self, key: str, value: _core.Value) -> None:
         """Set an initializer for the graph."""
-        if key != value.name:
+        if not isinstance(value, _core.Value):
+            raise TypeError(f"value must be a Value object, not {type(value)}")
+        if not isinstance(key, str):
+            raise TypeError(f"Value name must be a string, not {type(key)}")
+        if key == "":
+            raise ValueError("Value name cannot be an empty string")
+        if not value.name:
+            logger.info("Value %s does not have a name, setting it to '%s'", value, key)
+            value.name = key
+        elif key != value.name:
             raise ValueError(
-                f"Key '{key}' does not match the name of the value '{value.name}'"
+                f"Key '{key}' does not match the name of the value '{value.name}'. Please use the value.name as the key."
+            )
+        if value.producer() is not None:
+            raise ValueError(
+                f"Value '{value}' is produced by a node and cannot be a graph initializer"
             )
-        if not isinstance(key, str):
-            raise TypeError(f"Key must be a string, not {type(key)}")
         if key in self.data:
             # If the key already exists, unset the old value
             old_value = self.data[key]
@@ -266,3 +284,90 @@ class GraphInitializers(collections.UserDict[str, "_core.Value"]):
         # the dictionary is not modified
         self._maybe_unset_graph(value)
         super().__delitem__(key)
+    def add(self, value: _core.Value) -> None:
+        """Add an initializer to the graph."""
+        self[value.name] = value  # type: ignore[index]
+class Attributes(collections.UserDict[str, "_core.Attr"]):
+    """The attributes of a Node as ``dict[str, Attr]`` with additional access methods."""
+    def __init__(self, attrs: Iterable[_core.Attr]):
+        super().__init__({attr.name: attr for attr in attrs})
+    def __setitem__(self, key: str, value: _core.Attr) -> None:
+        """Set an attribute for the node."""
+        if type(key) is not str:
+            raise TypeError(f"Key must be a string, not {type(key)}")
+        if not isinstance(value, _core.Attr):
+            raise TypeError(f"Value must be an Attr, not {type(value)}")
+        super().__setitem__(key, value)
+    def add(self, value: _core.Attr) -> None:
+        """Add an attribute to the node."""
+        self[value.name] = value
+    def get_int(self, key: str, default: T = None) -> int | T:  # type: ignore[assignment]
+        """Get the integer value of the attribute."""
+        if key in self:
+            return self[key].as_int()
+        return default
+    def get_float(self, key: str, default: T = None) -> float | T:  # type: ignore[assignment]
+        """Get the float value of the attribute."""
+        if key in self:
+            return self[key].as_float()
+        return default
+    def get_string(self, key: str, default: T = None) -> str | T:  # type: ignore[assignment]
+        """Get the string value of the attribute."""
+        if key in self:
+            return self[key].as_string()
+        return default
+    def get_tensor(self, key: str, default: T = None) -> _protocols.TensorProtocol | T:  # type: ignore[assignment]
+        """Get the tensor value of the attribute."""
+        if key in self:
+            return self[key].as_tensor()
+        return default
+    def get_graph(self, key: str, default: T = None) -> _core.Graph | T:  # type: ignore[assignment]
+        """Get the graph value of the attribute."""
+        if key in self:
+            return self[key].as_graph()
+        return default
+    def get_ints(self, key: str, default: T = None) -> Sequence[int] | T:  # type: ignore[assignment]
+        """Get the Sequence of integers from the attribute."""
+        if key in self:
+            return self[key].as_ints()
+        return default
+    def get_floats(self, key: str, default: T = None) -> Sequence[float] | T:  # type: ignore[assignment]
+        """Get the Sequence of floats from the attribute."""
+        if key in self:
+            return self[key].as_floats()
+        return default
+    def get_strings(self, key: str, default: T = None) -> Sequence[str] | T:  # type: ignore[assignment]
+        """Get the Sequence of strings from the attribute."""
+        if key in self:
+            return self[key].as_strings()
+        return default
+    def get_tensors(
+        self,
+        key: str,
+        default: T = None,  # type: ignore[assignment]
+    ) -> Sequence[_protocols.TensorProtocol] | T:
+        """Get the Sequence of tensors from the attribute."""
+        if key in self:
+            return self[key].as_tensors()
+        return default
+    def get_graphs(self, key: str, default: T = None) -> Sequence[_core.Graph] | T:  # type: ignore[assignment]
+        """Get the Sequence of graphs from the attribute."""
+        if key in self:
+            return self[key].as_graphs()
+        return default

onnx_ir/_io.py CHANGED Viewed

@@ -7,10 +7,11 @@ from __future__ import annotations
 __all__ = ["load", "save"]
 import os
+from typing import Callable
-import onnx
+import onnx  # noqa: TID251
-from onnx_ir import _core, serde
+from onnx_ir import _core, _protocols, serde
 from onnx_ir import external_data as _external_data
 from onnx_ir._polyfill import zip
@@ -43,6 +44,8 @@ def save(
     format: str | None = None,
     external_data: str | os.PathLike | None = None,
     size_threshold_bytes: int = 256,
+    callback: Callable[[_protocols.TensorProtocol, _external_data.CallbackInfo], None]
+    | None = None,
 ) -> None:
     """Save an ONNX model to a file.
@@ -52,6 +55,30 @@ def save(
     to load the newly saved model, or provide a different external data path that
     is not currently referenced by any tensors in the model.
+    .. tip::
+        A simple progress bar can be implemented by passing a callback function as the following::
+            import onnx_ir as ir
+            import tqdm
+            with tqdm.tqdm() as pbar:
+                total_set = False
+                def callback(tensor: ir.TensorProtocol, metadata: ir.external_data.CallbackInfo) -> None:
+                    nonlocal total_set
+                    if not total_set:
+                        pbar.total = metadata.total
+                        total_set = True
+                    pbar.update()
+                    pbar.set_description(f"Saving {tensor.name} ({tensor.dtype}, {tensor.shape}) at offset {metadata.offset}")
+                ir.save(
+                    ...,
+                    callback=callback,
+                )
     Args:
         model: The model to save.
         path: The path to save the model to. E.g. "model.onnx".
@@ -65,6 +92,8 @@ def save(
             it will be serialized in the ONNX Proto message.
         size_threshold_bytes: Save to external data if the tensor size in bytes is larger than this threshold.
             Effective only when ``external_data`` is set.
+        callback: A callback function that is called for each tensor that is saved to external data
+            for debugging or logging purposes.
     Raises:
         ValueError: If the external data path is an absolute path.
@@ -77,12 +106,19 @@ def save(
         base_dir = os.path.dirname(path)
         # Store the original initializer values so they can be restored if modify_model=False
-        initializer_values = tuple(model.graph.initializers.values())
+        initializer_values: list[_core.Value] = []
+        for graph in model.graphs():
+            # Collect from all subgraphs as well
+            initializer_values.extend(graph.initializers.values())
         tensors = [v.const_value for v in initializer_values]
         try:
             model = _external_data.unload_from_model(
-                model, base_dir, external_data, size_threshold_bytes=size_threshold_bytes
+                model,
+                base_dir,
+                external_data,
+                size_threshold_bytes=size_threshold_bytes,
+                callback=callback,
             )
             proto = serde.serialize_model(model)
             onnx.save(proto, path, format=format)

onnx_ir/_type_casting.py CHANGED Viewed

@@ -15,7 +15,7 @@ if typing.TYPE_CHECKING:
     import numpy.typing as npt
-def pack_int4(array: np.ndarray) -> npt.NDArray[np.uint8]:
+def pack_4bitx2(array: np.ndarray) -> npt.NDArray[np.uint8]:
     """Convert a numpy array to flatten, packed int4/uint4. Elements must be in the correct range."""
     # Create a 1D copy
     array_flat = array.ravel().view(np.uint8).copy()
@@ -40,6 +40,7 @@ def _unpack_uint4_as_uint8(
     Returns:
         A numpy array of int8/uint8 reshaped to dims.
     """
+    assert data.dtype == np.uint8, "Input data must be of type uint8"
     result = np.empty([data.size * 2], dtype=data.dtype)
     array_low = data & np.uint8(0x0F)
     array_high = data & np.uint8(0xF0)

onnx_ir/_version_utils.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 """Version utils for testing."""
+# pylint: disable=import-outside-toplevel
 from __future__ import annotations
 import packaging.version
@@ -9,7 +10,7 @@ import packaging.version
 def onnx_older_than(version: str) -> bool:
     """Returns True if the ONNX version is older than the given version."""
-    import onnx  # pylint: disable=import-outside-toplevel
+    import onnx  # noqa: TID251
     return (
         packaging.version.parse(onnx.__version__).release
@@ -19,7 +20,7 @@ def onnx_older_than(version: str) -> bool:
 def torch_older_than(version: str) -> bool:
     """Returns True if the torch version is older than the given version."""
-    import torch  # pylint: disable=import-outside-toplevel
+    import torch
     return (
         packaging.version.parse(torch.__version__).release
@@ -27,42 +28,9 @@ def torch_older_than(version: str) -> bool:
     )
-def transformers_older_than(version: str) -> bool | None:
-    """Returns True if the transformers version is older than the given version."""
-    try:
-        import transformers  # pylint: disable=import-outside-toplevel
-    except ImportError:
-        return None
-    return (
-        packaging.version.parse(transformers.__version__).release
-        < packaging.version.parse(version).release
-    )
-def is_onnxruntime_training() -> bool:
-    """Returns True if the onnxruntime is onnxruntime-training."""
-    try:
-        from onnxruntime import training  # pylint: disable=import-outside-toplevel
-        assert training
-    except ImportError:
-        # onnxruntime not training
-        return False
-    try:
-        from onnxruntime.capi.onnxruntime_pybind11_state import (  # pylint: disable=import-outside-toplevel
-            OrtValueVector,
-        )
-    except ImportError:
-        return False
-    return hasattr(OrtValueVector, "push_back_batch")
 def onnxruntime_older_than(version: str) -> bool:
     """Returns True if the onnxruntime version is older than the given version."""
-    import onnxruntime  # pylint: disable=import-outside-toplevel
+    import onnxruntime
     return (
         packaging.version.parse(onnxruntime.__version__).release
@@ -72,20 +40,9 @@ def onnxruntime_older_than(version: str) -> bool:
 def numpy_older_than(version: str) -> bool:
     """Returns True if the numpy version is older than the given version."""
-    import numpy  # pylint: disable=import-outside-toplevel
+    import numpy
     return (
         packaging.version.parse(numpy.__version__).release
         < packaging.version.parse(version).release
     )
-def has_transformers():
-    """Tells if transformers is installed."""
-    try:
-        import transformers  # pylint: disable=import-outside-toplevel
-        assert transformers
-        return True  # noqa
-    except ImportError:
-        return False

onnx_ir/convenience.py CHANGED Viewed

@@ -7,15 +7,17 @@ from __future__ import annotations
 __all__ = [
     "convert_attribute",
     "convert_attributes",
+    "create_value_mapping",
+    "get_const_tensor",
     "replace_all_uses_with",
     "replace_nodes_and_values",
-    "create_value_mapping",
 ]
 from onnx_ir._convenience import (
     convert_attribute,
     convert_attributes,
     create_value_mapping,
+    get_const_tensor,
     replace_all_uses_with,
     replace_nodes_and_values,
 )

onnx_ir/external_data.py CHANGED Viewed

@@ -4,12 +4,15 @@
 from __future__ import annotations
+from typing import Callable
 __all__ = [
     "set_base_dir",
     "unload_from_model",
     "load_to_model",
     "convert_tensors_to_external",
     "convert_tensors_from_external",
+    "CallbackInfo",
 ]
 import dataclasses
@@ -48,6 +51,21 @@ class _ExternalDataInfo:
     length: int
+@dataclasses.dataclass
+class CallbackInfo:
+    """A class that shares information about a tensor that is to be saved as external data for callback functions.
+    Attributes:
+        total: The total number of tensors to save.
+        index: The index of the tensor being saved.
+        offset: The offset of the tensor in the external data file.
+    """
+    total: int
+    index: int
+    offset: int
 def _all_tensors(
     graph: _core.Graph | _core.GraphView, include_attributes: bool = False
 ) -> Iterator[_protocols.TensorProtocol]:
@@ -157,6 +175,7 @@ def _write_external_data(
     tensors: Sequence[_protocols.TensorProtocol],
     external_data_infos: Sequence[_ExternalDataInfo],
     file_path: str | os.PathLike,
+    callback: Callable[[_protocols.TensorProtocol, CallbackInfo], None] | None = None,
 ) -> None:
     """Write tensor data to an external file according to information stored in ExternalDataInfo objects.
@@ -164,12 +183,26 @@ def _write_external_data(
         tensors: Tensors to be written as external data.
         external_data_infos: External data information stored for each tensor to be written as external data.
         file_path: Location to which external data is to be stored.
+        callback: A callback function that is called for each tensor that is saved to external data
+            for debugging or logging purposes.
     """
-    assert len(tensors) == len(external_data_infos), (
+    tensors_count = len(tensors)
+    assert tensors_count == len(external_data_infos), (
         "Number of tensors and external data infos should match"
     )
     with open(file_path, "wb") as data_file:
-        for tensor, tensor_info in zip(tensors, external_data_infos, strict=True):
+        for i, (tensor, tensor_info) in enumerate(
+            zip(tensors, external_data_infos, strict=True)
+        ):
+            if callback is not None:
+                callback(
+                    tensor,
+                    CallbackInfo(
+                        total=tensors_count,
+                        index=i,
+                        offset=tensor_info.offset,
+                    ),
+                )
             current_offset = tensor_info.offset
             assert tensor is not None
             raw_data = tensor.tobytes()
@@ -228,6 +261,7 @@ def convert_tensors_to_external(
     tensors: Sequence[_protocols.TensorProtocol],
     base_dir: str | os.PathLike,
     relative_path: str | os.PathLike,
+    callback: Callable[[_protocols.TensorProtocol, CallbackInfo], None] | None = None,
 ) -> list[_core.ExternalTensor]:
     """Convert a sequence of any TensorProtocol tensors to external tensors.
@@ -238,6 +272,8 @@ def convert_tensors_to_external(
         tensors: Tensors to be converted to external tensors. They can be external tensors themselves.
         base_dir: Path of base directory.
         relative_path: Path to which external data is to be stored, relative to the ONNX file.
+        callback: A callback function that is called for each tensor that is saved to external data
+            for debugging or logging purposes.
     Returns:
         A list of external tensors derived from a list of input tensors. The order
@@ -285,7 +321,7 @@ def convert_tensors_to_external(
         external_info = _compute_external_data_info(tensor, current_offset)
         external_data_infos.append(external_info)
         current_offset = external_info.offset + external_info.length
-    _write_external_data(sorted_tensors, external_data_infos, path)
+    _write_external_data(sorted_tensors, external_data_infos, path, callback=callback)
     # Create external tensor objects
     external_tensors: list[_core.ExternalTensor] = [
@@ -336,6 +372,7 @@ def unload_from_model(
     relative_path: str | os.PathLike,
     *,
     size_threshold_bytes: int = 0,
+    callback: Callable[[_protocols.TensorProtocol, CallbackInfo], None] | None = None,
 ) -> _core.Model:
     """Convert all initializers equal or above size_threshold_bytes to external tensors in-place and save data to a single data file.
@@ -356,6 +393,8 @@ def unload_from_model(
         relative_path: Path to which external data is to be stored, relative to the ONNX file.
             E.g. "model.data"
         size_threshold_bytes: Save to external data if the tensor size in bytes is larger than this threshold.
+        callback: A callback function that is called for each tensor that is saved to external data
+            for debugging or logging purposes.
     Returns:
         An ir.Model with all initializer data equal or above ``size_threshold_bytes``
@@ -384,6 +423,7 @@ def unload_from_model(
         [v.const_value for v in initializers_to_become_external],  # type: ignore[misc]
         base_dir=base_dir,
         relative_path=relative_path,
+        callback=callback,
     )
     # Replace the initializer values with external tensors and save the model

onnx_ir/passes/_pass_infra.py CHANGED Viewed

@@ -127,7 +127,7 @@ class PassBase(abc.ABC):
         # Check postconditions
         try:
-            self.ensures(model)
+            self.ensures(result.model)
         except PostconditionError:
             raise
         except Exception as e:

onnx_ir/passes/common/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ __all__ = [
     "AddInitializersToInputsPass",
     "CheckerPass",
     "ClearMetadataAndDocStringPass",
+    "CommonSubexpressionEliminationPass",
     "InlinePass",
     "LiftConstantsToInitializersPass",
     "LiftSubgraphInitializersToMainGraphPass",
@@ -19,6 +20,9 @@ __all__ = [
 from onnx_ir.passes.common.clear_metadata_and_docstring import (
     ClearMetadataAndDocStringPass,
 )
+from onnx_ir.passes.common.common_subexpression_elimination import (
+    CommonSubexpressionEliminationPass,
+)
 from onnx_ir.passes.common.constant_manipulation import (
     AddInitializersToInputsPass,
     LiftConstantsToInitializersPass,

onnx_ir/passes/common/_c_api_utils.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Callable, TypeVar
 import onnx_ir as ir
 if TYPE_CHECKING:
-    import onnx
+    import onnx  # noqa: TID251
 logger = logging.getLogger(__name__)

onnx-ir 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

Potentially problematic release.

onnx-ir 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl