PyPI - onnxruntime-directml - Versions diffs - 1.19.2__cp310-cp310-win_amd64.whl → 1.20.0__cp310-cp310-win_amd64.whl - Mend

onnxruntime-directml 1.19.2__cp310-cp310-win_amd64.whl → 1.20.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

onnxruntime/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ ONNX Runtime is a performance-focused scoring engine for Open Neural Network Exc
 For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
 or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
 """
-__version__ = "1.19.2"
+__version__ = "1.20.0"
 __author__ = "Microsoft"
 # we need to do device version validation (for example to check Cuda version for an onnxruntime-training package).
@@ -23,6 +23,7 @@ try:
     from onnxruntime.capi._pybind_state import ExecutionMode  # noqa: F401
     from onnxruntime.capi._pybind_state import ExecutionOrder  # noqa: F401
     from onnxruntime.capi._pybind_state import GraphOptimizationLevel  # noqa: F401
+    from onnxruntime.capi._pybind_state import LoraAdapter  # noqa: F401
     from onnxruntime.capi._pybind_state import ModelMetadata  # noqa: F401
     from onnxruntime.capi._pybind_state import NodeArg  # noqa: F401
     from onnxruntime.capi._pybind_state import OrtAllocatorType  # noqa: F401
@@ -56,6 +57,7 @@ from onnxruntime.capi import onnxruntime_validation
 if import_capi_exception:
     raise import_capi_exception
+from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat  # noqa: F401
 from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession  # noqa: F401
 from onnxruntime.capi.onnxruntime_inference_collection import IOBinding  # noqa: F401
 from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice  # noqa: F401

onnxruntime/capi/DirectML.dll CHANGED Viewed

Binary file

onnxruntime/capi/convert_npz_to_onnx_adapter.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# This script helps converting .npz files to .onnx_adapter files
+import argparse
+import os
+import sys
+import numpy as np
+import onnxruntime as ort
+def get_args() -> argparse:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--npz_file_path", type=str, required=True)
+    parser.add_argument("--output_file_path", type=str, required=True)
+    parser.add_argument("--adapter_version", type=int, required=True)
+    parser.add_argument("--model_version", type=int, required=True)
+    return parser.parse_args()
+def export_lora_parameters(
+    npz_file_path: os.PathLike, adapter_version: int, model_version: int, output_file_path: os.PathLike
+):
+    """The function converts lora parameters in npz to onnx_adapter format"""
+    adapter_format = ort.AdapterFormat()
+    adapter_format.set_adapter_version(adapter_version)
+    adapter_format.set_model_version(model_version)
+    name_to_ort_value = {}
+    with np.load(npz_file_path) as data:
+        for name, np_arr in data.items():
+            ort_value = ort.OrtValue.ortvalue_from_numpy(np_arr)
+            name_to_ort_value[name] = ort_value
+    adapter_format.set_parameters(name_to_ort_value)
+    adapter_format.export_adapter(output_file_path)
+def main() -> int:
+    args = get_args()
+    export_lora_parameters(args.npz_file_path, args.adapter_version, args.model_version, args.output_file_path)
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

onnxruntime/capi/onnxruntime.dll CHANGED Viewed

Binary file

onnxruntime/capi/onnxruntime_collect_build_info.py CHANGED Viewed

@@ -45,59 +45,3 @@ def find_cudart_versions(build_env=False, build_cuda_version=None):
     # convert to list and remove None
     return [ver for ver in cudart_found_versions if ver]
-def find_cudnn_supported_cuda_versions(build_env=False):
-    # comments in get_cudart_version apply here
-    if not sys.platform.startswith("linux"):
-        warnings.warn("find_cudnn_versions only works on Linux")
-    cudnn_possible_versions = {None}
-    if not build_env:
-        # if not in a build environment, there may be more than one installed cudnn.
-        # https://developer.nvidia.com/rdp/cudnn-archive to include all that may support Cuda 10+.
-        cudnn_possible_versions.update(
-            {
-                "8.2",
-                "8.1.1",
-                "8.1.0",
-                "8.0.5",
-                "8.0.4",
-                "8.0.3",
-                "8.0.2",
-                "8.0.1",
-                "7.6.5",
-                "7.6.4",
-                "7.6.3",
-                "7.6.2",
-                "7.6.1",
-                "7.6.0",
-                "7.5.1",
-                "7.5.0",
-                "7.4.2",
-                "7.4.1",
-                "7.3.1",
-                "7.3.0",
-            }
-        )
-    def get_cudnn_supported_cuda_version(find_cudnn_version=None):
-        cudnn_lib_filename = "libcudnn.so"
-        if find_cudnn_version:
-            cudnn_lib_filename = cudnn_lib_filename + "." + find_cudnn_version
-        # in cudnn.h cudnn version are calculated as:
-        # #define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
-        try:
-            cudnn = ctypes.CDLL(cudnn_lib_filename)
-            # cudnn_ver = cudnn.cudnnGetVersion()
-            cuda_ver = cudnn.cudnnGetCudartVersion()
-            return cuda_ver
-        except Exception:
-            return None
-    # use set to avoid duplications
-    cuda_found_versions = {get_cudnn_supported_cuda_version(cudnn_version) for cudnn_version in cudnn_possible_versions}
-    # convert to list and remove None
-    return [ver for ver in cuda_found_versions if ver]

onnxruntime/capi/onnxruntime_inference_collection.py CHANGED Viewed

@@ -32,6 +32,52 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice:
         raise Exception("Unsupported device type: " + device_type)
+class AdapterFormat:
+    """
+    This class is used to create adapter files from python structures
+    """
+    def __init__(self, adapter=None) -> None:
+        if adapter is None:
+            self._adapter = C.AdapterFormat()
+        else:
+            self._adapter = adapter
+    @staticmethod
+    def read_adapter(file_path: os.PathLike) -> AdapterFormat:
+        return AdapterFormat(C.AdapterFormat.read_adapter(file_path))
+    def export_adapter(self, file_path: os.PathLike):
+        """
+        This function writes a file at the specified location
+        in onnxrunitme adapter format containing Lora parameters.
+        :param file_path: absolute path for the adapter
+        """
+        self._adapter.export_adapter(file_path)
+    def get_format_version(self):
+        return self._adapter.format_version
+    def set_adapter_version(self, adapter_version: int):
+        self._adapter.adapter_version = adapter_version
+    def get_adapter_version(self):
+        return self._adapter.adapter_version
+    def set_model_version(self, model_version: int):
+        self._adapter.model_version = model_version
+    def get_model_version(self):
+        return self._adapter.model_version
+    def set_parameters(self, params: dict[str, OrtValue]):
+        self._adapter.parameters = {k: v._ortvalue for k, v in params.items()}
+    def get_parameters(self) -> dict[str, OrtValue]:
+        return {k: OrtValue(v) for k, v in self._adapter.parameters.items()}
 def check_and_normalize_provider_args(
     providers: Sequence[str | tuple[str, dict[Any, Any]]] | None,
     provider_options: Sequence[dict[Any, Any]] | None,
@@ -556,7 +602,7 @@ class IOBinding:
         :param name: input name
         :param device_type: e.g. cpu, cuda, cann
         :param device_id: device id, e.g. 0
-        :param element_type: input element type
+        :param element_type: input element type. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16)
         :param shape: input shape
         :param buffer_ptr: memory pointer to input data
         """
@@ -595,7 +641,7 @@ class IOBinding:
         :param name: output name
         :param device_type: e.g. cpu, cuda, cann, cpu by default
         :param device_id: device id, e.g. 0
-        :param element_type: output element type
+        :param element_type: output element type. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16)
         :param shape: output shape
         :param buffer_ptr: memory pointer to output data
         """
@@ -712,17 +758,43 @@ class OrtValue:
         )
     @staticmethod
-    def ortvalue_from_shape_and_type(shape=None, element_type=None, device_type="cpu", device_id=0):
+    def ortvalue_from_numpy_with_onnx_type(data, onnx_element_type: int):
+        """
+        This method creates an instance of OrtValue on top of the numpy array.
+        No data copy is made and the lifespan of the resulting OrtValue should never
+        exceed the lifespan of bytes object. The API attempts to reinterpret
+        the data type which is expected to be the same size. This is useful
+        when we want to use an ONNX data type that is not supported by numpy.
+        :param data: numpy.ndarray.
+        :param onnx_elemenet_type: a valid onnx TensorProto::DataType enum value
+        """
+        return OrtValue(C.OrtValue.ortvalue_from_numpy_with_onnx_type(data, onnx_element_type), data)
+    @staticmethod
+    def ortvalue_from_shape_and_type(shape, element_type, device_type: str = "cpu", device_id: int = 0):
         """
         Factory method to construct an OrtValue (which holds a Tensor) from given shape and element_type
         :param shape: List of integers indicating the shape of the OrtValue
-        :param element_type: The data type of the elements in the OrtValue (numpy type)
+        :param element_type: The data type of the elements. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16).
         :param device_type: e.g. cpu, cuda, cann, cpu by default
         :param device_id: device id, e.g. 0
         """
-        if shape is None or element_type is None:
-            raise ValueError("`element_type` and `shape` are to be provided if pre-allocated memory is provided")
+        # Integer for onnx element type (see https://onnx.ai/onnx/api/mapping.html).
+        # This is helpful for some data type (like TensorProto.BFLOAT16) that is not available in numpy.
+        if isinstance(element_type, int):
+            return OrtValue(
+                C.OrtValue.ortvalue_from_shape_and_onnx_type(
+                    shape,
+                    element_type,
+                    C.OrtDevice(
+                        get_ort_device_type(device_type, device_id),
+                        C.OrtDevice.default_memory(),
+                        device_id,
+                    ),
+                )
+            )
         return OrtValue(
             C.OrtValue.ortvalue_from_shape_and_type(

onnxruntime/capi/onnxruntime_providers_shared.dll CHANGED Viewed

Binary file

onnxruntime/capi/onnxruntime_pybind11_state.pyd CHANGED Viewed

Binary file

onnxruntime/capi/onnxruntime_validation.py CHANGED Viewed

@@ -55,9 +55,15 @@ def check_distro_info():
             warnings.warn(
                 f"Unsupported macOS version ({__my_distro_ver__}). ONNX Runtime supports macOS 11.0 or later."
             )
+    elif __my_system__ == "aix":
+        import subprocess
+        returned_output = subprocess.check_output("oslevel")
+        __my_distro_ver__str = returned_output.decode("utf-8")
+        __my_distro_ver = __my_distro_ver__str[:3]
     else:
         warnings.warn(
-            f"Unsupported platform ({__my_system__}). ONNX Runtime supports Linux, macOS and Windows platforms, only."
+            f"Unsupported platform ({__my_system__}). ONNX Runtime supports Linux, macOS, AIX and Windows platforms, only."
         )

onnxruntime/quantization/base_quantizer.py CHANGED Viewed

@@ -230,7 +230,9 @@ class BaseQuantizer:
             # TODO: This formula should be explained including why the scale is not estimated for the bias as well.
             bias_scale = input_scale * weight_scale * beta
-            quantized_data = (np.asarray(bias_data) / bias_scale).round().astype(np.int32)
+            quantized_data = (np.asarray(bias_data) / bias_scale).round()
+            quantized_data = np.clip(quantized_data, np.iinfo(np.int32).min, np.iinfo(np.int32).max)
+            quantized_data = quantized_data.astype(np.int32)
             # update bias initializer
             bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(bias_initializer.dims)
@@ -418,6 +420,9 @@ class BaseQuantizer:
         zero_point_list = []
         scale_list = []
         quantized_per_channel_data_list = []
+        weights_shape = list(weights.shape)
+        reshape_dims = list(weights_shape)  # deep copy
+        reshape_dims[channel_axis] = 1  # only one per channel for reshape
         for i in range(channel_count):
             per_channel_data = weights.take(i, channel_axis)
             channel_override_index = i if i < num_channel_overrides else 0
@@ -460,17 +465,10 @@ class BaseQuantizer:
             zero_point_list.append(zero_point)
             scale_list.append(scale)
-            quantized_per_channel_data_list.append(quantized_per_channel_data)
+            quantized_per_channel_data_list.append(np.asarray(quantized_per_channel_data).reshape(reshape_dims))
         # combine per_channel_data into one
-        weights_shape = list(weights.shape)
-        reshape_dims = list(weights_shape)  # deep copy
-        reshape_dims[channel_axis] = 1  # only one per channel for reshape
-        quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
-        for i in range(1, len(quantized_per_channel_data_list)):
-            channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
-            quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)
+        quantized_weights = np.concatenate(quantized_per_channel_data_list, channel_axis)
         q_weight_name = weight_name + TENSOR_NAME_QUANT_SUFFIX
         zp_name = weight_name + "_zero_point"
         scale_name = weight_name + "_scale"
@@ -519,8 +517,6 @@ class BaseQuantizer:
         for node in self.model.nodes():
             # adjust tensor_ranges for input of Clip and Relu node
             if node.op_type in ["Clip", "Relu"]:
-                if self.is_activation_symmetric:
-                    continue
                 if not self.should_quantize_node(node):
                     continue
                 if len(self.model.input_name_to_nodes()[node.input[0]]) != 1:

onnxruntime/quantization/calibrate.py CHANGED Viewed

@@ -69,6 +69,7 @@ class TensorData:
     _floats = frozenset(["avg", "std", "lowest", "highest", "hist_edges"])
     def __init__(self, **kwargs):
+        self._attrs = list(kwargs.keys())
         for k, v in kwargs.items():
             if k not in TensorData._allowed:
                 raise ValueError(f"Unexpected value {k!r} not in {TensorData._allowed}.")
@@ -91,6 +92,12 @@ class TensorData:
             raise AttributeError(f"Attributes 'avg' and/or 'std' missing in {dir(self)}.")
         return (self.avg, self.std)
+    def to_dict(self):
+        # This is needed to serialize the data into JSON.
+        data = {k: getattr(self, k) for k in self._attrs}
+        data["CLS"] = self.__class__.__name__
+        return data
 class TensorsData:
     def __init__(self, calibration_method, data: Dict[str, Union[TensorData, Tuple]]):
@@ -125,12 +132,24 @@ class TensorsData:
             raise RuntimeError(f"Only an existing tensor can be modified, {key!r} is not.")
         self.data[key] = value
+    def keys(self):
+        return self.data.keys()
     def values(self):
         return self.data.values()
     def items(self):
         return self.data.items()
+    def to_dict(self):
+        # This is needed to serialize the data into JSON.
+        data = {
+            "CLS": self.__class__.__name__,
+            "data": self.data,
+            "calibration_method": self.calibration_method,
+        }
+        return data
 class CalibrationMethod(Enum):
     MinMax = 0
@@ -565,16 +584,29 @@ class HistogramCalibrater(CalibraterBase):
         """
         Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
         """
+        input_names_set = {node_arg.name for node_arg in self.infer_session.get_inputs()}
+        output_names = [node_arg.name for node_arg in self.infer_session.get_outputs()]
         while True:
             inputs = data_reader.get_next()
             if not inputs:
                 break
-            self.intermediate_outputs.append(self.infer_session.run(None, inputs))
+            outputs = self.infer_session.run(None, inputs)
+            # Copy np.ndarray only for graph outputs that are also graph inputs to workaround bug:
+            # https://github.com/microsoft/onnxruntime/issues/21922
+            fixed_outputs = []
+            for output_index, output in enumerate(outputs):
+                if output_names[output_index] in input_names_set:
+                    fixed_outputs.append(copy.copy(output))
+                else:
+                    fixed_outputs.append(output)
+            self.intermediate_outputs.append(fixed_outputs)
         if len(self.intermediate_outputs) == 0:
             raise ValueError("No data is collected.")
-        output_names = [self.infer_session.get_outputs()[i].name for i in range(len(self.intermediate_outputs[0]))]
         output_dicts_list = [
             dict(zip(output_names, intermediate_output)) for intermediate_output in self.intermediate_outputs
         ]