onnxruntime-directml 1.19.2__cp310-cp310-win_amd64.whl → 1.20.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxruntime/__init__.py +3 -1
- onnxruntime/capi/DirectML.dll +0 -0
- onnxruntime/capi/convert_npz_to_onnx_adapter.py +48 -0
- onnxruntime/capi/onnxruntime.dll +0 -0
- onnxruntime/capi/onnxruntime_collect_build_info.py +0 -56
- onnxruntime/capi/onnxruntime_inference_collection.py +78 -6
- onnxruntime/capi/onnxruntime_providers_shared.dll +0 -0
- onnxruntime/capi/onnxruntime_pybind11_state.pyd +0 -0
- onnxruntime/capi/onnxruntime_validation.py +7 -1
- onnxruntime/quantization/base_quantizer.py +8 -12
- onnxruntime/quantization/calibrate.py +34 -2
- onnxruntime/quantization/matmul_4bits_quantizer.py +662 -39
- onnxruntime/quantization/operators/gather.py +1 -1
- onnxruntime/quantization/operators/matmul.py +8 -5
- onnxruntime/quantization/qdq_quantizer.py +1 -2
- onnxruntime/quantization/quant_utils.py +30 -5
- onnxruntime/quantization/quantize.py +14 -3
- onnxruntime/quantization/registry.py +1 -0
- onnxruntime/quantization/tensor_quant_overrides.py +2 -2
- onnxruntime/tools/mobile_helpers/coreml_supported_mlprogram_ops.md +7 -5
- onnxruntime/tools/mobile_helpers/coreml_supported_neuralnetwork_ops.md +1 -1
- onnxruntime/transformers/benchmark.py +1 -1
- onnxruntime/transformers/constants.py +2 -2
- onnxruntime/transformers/convert_generation.py +1 -1
- onnxruntime/transformers/convert_to_packing_mode.py +10 -10
- onnxruntime/transformers/fusion_attention_sam2.py +534 -0
- onnxruntime/transformers/fusion_gelu.py +12 -3
- onnxruntime/transformers/fusion_layernorm.py +158 -2
- onnxruntime/transformers/fusion_rotary_attention.py +1 -1
- onnxruntime/transformers/fusion_skiplayernorm.py +26 -17
- onnxruntime/transformers/io_binding_helper.py +7 -4
- onnxruntime/transformers/machine_info.py +0 -2
- onnxruntime/transformers/models/gpt2/convert_to_onnx.py +5 -1
- onnxruntime/transformers/models/llama/benchmark_all.py +1 -5
- onnxruntime/transformers/models/llama/convert_to_onnx.py +25 -58
- onnxruntime/transformers/models/sam2/__init__.py +12 -0
- onnxruntime/transformers/models/sam2/benchmark_sam2.py +625 -0
- onnxruntime/transformers/models/sam2/convert_to_onnx.py +260 -0
- onnxruntime/transformers/models/sam2/image_decoder.py +273 -0
- onnxruntime/transformers/models/sam2/image_encoder.py +186 -0
- onnxruntime/transformers/models/sam2/mask_decoder.py +208 -0
- onnxruntime/transformers/models/sam2/nvtx_helper.py +33 -0
- onnxruntime/transformers/models/sam2/prompt_encoder.py +189 -0
- onnxruntime/transformers/models/sam2/sam2_demo.py +322 -0
- onnxruntime/transformers/models/sam2/sam2_image_onnx_predictor.py +280 -0
- onnxruntime/transformers/models/sam2/sam2_utils.py +147 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder.py +1 -1
- onnxruntime/transformers/models/whisper/benchmark_all.py +1 -5
- onnxruntime/transformers/models/whisper/whisper_chain.py +4 -1
- onnxruntime/transformers/onnx_model.py +62 -17
- onnxruntime/transformers/onnx_model_bert.py +3 -3
- onnxruntime/transformers/onnx_model_clip.py +1 -0
- onnxruntime/transformers/onnx_model_sam2.py +138 -0
- onnxruntime/transformers/optimizer.py +5 -3
- {onnxruntime_directml-1.19.2.dist-info → onnxruntime_directml-1.20.0.dist-info}/METADATA +3 -8
- {onnxruntime_directml-1.19.2.dist-info → onnxruntime_directml-1.20.0.dist-info}/RECORD +59 -47
- onnxruntime/tools/mobile_helpers/check_model_can_use_ort_mobile_pkg.py +0 -301
- onnxruntime/tools/mobile_helpers/mobile_package.required_operators.config +0 -46
- {onnxruntime_directml-1.19.2.dist-info → onnxruntime_directml-1.20.0.dist-info}/WHEEL +0 -0
- {onnxruntime_directml-1.19.2.dist-info → onnxruntime_directml-1.20.0.dist-info}/entry_points.txt +0 -0
- {onnxruntime_directml-1.19.2.dist-info → onnxruntime_directml-1.20.0.dist-info}/top_level.txt +0 -0
onnxruntime/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ ONNX Runtime is a performance-focused scoring engine for Open Neural Network Exc
|
|
|
7
7
|
For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
|
|
8
8
|
or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
|
|
9
9
|
"""
|
|
10
|
-
__version__ = "1.
|
|
10
|
+
__version__ = "1.20.0"
|
|
11
11
|
__author__ = "Microsoft"
|
|
12
12
|
|
|
13
13
|
# we need to do device version validation (for example to check Cuda version for an onnxruntime-training package).
|
|
@@ -23,6 +23,7 @@ try:
|
|
|
23
23
|
from onnxruntime.capi._pybind_state import ExecutionMode # noqa: F401
|
|
24
24
|
from onnxruntime.capi._pybind_state import ExecutionOrder # noqa: F401
|
|
25
25
|
from onnxruntime.capi._pybind_state import GraphOptimizationLevel # noqa: F401
|
|
26
|
+
from onnxruntime.capi._pybind_state import LoraAdapter # noqa: F401
|
|
26
27
|
from onnxruntime.capi._pybind_state import ModelMetadata # noqa: F401
|
|
27
28
|
from onnxruntime.capi._pybind_state import NodeArg # noqa: F401
|
|
28
29
|
from onnxruntime.capi._pybind_state import OrtAllocatorType # noqa: F401
|
|
@@ -56,6 +57,7 @@ from onnxruntime.capi import onnxruntime_validation
|
|
|
56
57
|
if import_capi_exception:
|
|
57
58
|
raise import_capi_exception
|
|
58
59
|
|
|
60
|
+
from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat # noqa: F401
|
|
59
61
|
from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401
|
|
60
62
|
from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401
|
|
61
63
|
from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401
|
onnxruntime/capi/DirectML.dll
CHANGED
|
Binary file
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
# This script helps converting .npz files to .onnx_adapter files
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
import onnxruntime as ort
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_args() -> argparse:
|
|
16
|
+
parser = argparse.ArgumentParser()
|
|
17
|
+
parser.add_argument("--npz_file_path", type=str, required=True)
|
|
18
|
+
parser.add_argument("--output_file_path", type=str, required=True)
|
|
19
|
+
parser.add_argument("--adapter_version", type=int, required=True)
|
|
20
|
+
parser.add_argument("--model_version", type=int, required=True)
|
|
21
|
+
return parser.parse_args()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def export_lora_parameters(
|
|
25
|
+
npz_file_path: os.PathLike, adapter_version: int, model_version: int, output_file_path: os.PathLike
|
|
26
|
+
):
|
|
27
|
+
"""The function converts lora parameters in npz to onnx_adapter format"""
|
|
28
|
+
adapter_format = ort.AdapterFormat()
|
|
29
|
+
adapter_format.set_adapter_version(adapter_version)
|
|
30
|
+
adapter_format.set_model_version(model_version)
|
|
31
|
+
name_to_ort_value = {}
|
|
32
|
+
with np.load(npz_file_path) as data:
|
|
33
|
+
for name, np_arr in data.items():
|
|
34
|
+
ort_value = ort.OrtValue.ortvalue_from_numpy(np_arr)
|
|
35
|
+
name_to_ort_value[name] = ort_value
|
|
36
|
+
|
|
37
|
+
adapter_format.set_parameters(name_to_ort_value)
|
|
38
|
+
adapter_format.export_adapter(output_file_path)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def main() -> int:
|
|
42
|
+
args = get_args()
|
|
43
|
+
export_lora_parameters(args.npz_file_path, args.adapter_version, args.model_version, args.output_file_path)
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
sys.exit(main())
|
onnxruntime/capi/onnxruntime.dll
CHANGED
|
Binary file
|
|
@@ -45,59 +45,3 @@ def find_cudart_versions(build_env=False, build_cuda_version=None):
|
|
|
45
45
|
|
|
46
46
|
# convert to list and remove None
|
|
47
47
|
return [ver for ver in cudart_found_versions if ver]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def find_cudnn_supported_cuda_versions(build_env=False):
|
|
51
|
-
# comments in get_cudart_version apply here
|
|
52
|
-
if not sys.platform.startswith("linux"):
|
|
53
|
-
warnings.warn("find_cudnn_versions only works on Linux")
|
|
54
|
-
|
|
55
|
-
cudnn_possible_versions = {None}
|
|
56
|
-
if not build_env:
|
|
57
|
-
# if not in a build environment, there may be more than one installed cudnn.
|
|
58
|
-
# https://developer.nvidia.com/rdp/cudnn-archive to include all that may support Cuda 10+.
|
|
59
|
-
cudnn_possible_versions.update(
|
|
60
|
-
{
|
|
61
|
-
"8.2",
|
|
62
|
-
"8.1.1",
|
|
63
|
-
"8.1.0",
|
|
64
|
-
"8.0.5",
|
|
65
|
-
"8.0.4",
|
|
66
|
-
"8.0.3",
|
|
67
|
-
"8.0.2",
|
|
68
|
-
"8.0.1",
|
|
69
|
-
"7.6.5",
|
|
70
|
-
"7.6.4",
|
|
71
|
-
"7.6.3",
|
|
72
|
-
"7.6.2",
|
|
73
|
-
"7.6.1",
|
|
74
|
-
"7.6.0",
|
|
75
|
-
"7.5.1",
|
|
76
|
-
"7.5.0",
|
|
77
|
-
"7.4.2",
|
|
78
|
-
"7.4.1",
|
|
79
|
-
"7.3.1",
|
|
80
|
-
"7.3.0",
|
|
81
|
-
}
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
def get_cudnn_supported_cuda_version(find_cudnn_version=None):
|
|
85
|
-
cudnn_lib_filename = "libcudnn.so"
|
|
86
|
-
if find_cudnn_version:
|
|
87
|
-
cudnn_lib_filename = cudnn_lib_filename + "." + find_cudnn_version
|
|
88
|
-
|
|
89
|
-
# in cudnn.h cudnn version are calculated as:
|
|
90
|
-
# #define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
|
|
91
|
-
try:
|
|
92
|
-
cudnn = ctypes.CDLL(cudnn_lib_filename)
|
|
93
|
-
# cudnn_ver = cudnn.cudnnGetVersion()
|
|
94
|
-
cuda_ver = cudnn.cudnnGetCudartVersion()
|
|
95
|
-
return cuda_ver
|
|
96
|
-
except Exception:
|
|
97
|
-
return None
|
|
98
|
-
|
|
99
|
-
# use set to avoid duplications
|
|
100
|
-
cuda_found_versions = {get_cudnn_supported_cuda_version(cudnn_version) for cudnn_version in cudnn_possible_versions}
|
|
101
|
-
|
|
102
|
-
# convert to list and remove None
|
|
103
|
-
return [ver for ver in cuda_found_versions if ver]
|
|
@@ -32,6 +32,52 @@ def get_ort_device_type(device_type: str, device_index) -> C.OrtDevice:
|
|
|
32
32
|
raise Exception("Unsupported device type: " + device_type)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
class AdapterFormat:
|
|
36
|
+
"""
|
|
37
|
+
This class is used to create adapter files from python structures
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, adapter=None) -> None:
|
|
41
|
+
if adapter is None:
|
|
42
|
+
self._adapter = C.AdapterFormat()
|
|
43
|
+
else:
|
|
44
|
+
self._adapter = adapter
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def read_adapter(file_path: os.PathLike) -> AdapterFormat:
|
|
48
|
+
return AdapterFormat(C.AdapterFormat.read_adapter(file_path))
|
|
49
|
+
|
|
50
|
+
def export_adapter(self, file_path: os.PathLike):
|
|
51
|
+
"""
|
|
52
|
+
This function writes a file at the specified location
|
|
53
|
+
in onnxrunitme adapter format containing Lora parameters.
|
|
54
|
+
|
|
55
|
+
:param file_path: absolute path for the adapter
|
|
56
|
+
"""
|
|
57
|
+
self._adapter.export_adapter(file_path)
|
|
58
|
+
|
|
59
|
+
def get_format_version(self):
|
|
60
|
+
return self._adapter.format_version
|
|
61
|
+
|
|
62
|
+
def set_adapter_version(self, adapter_version: int):
|
|
63
|
+
self._adapter.adapter_version = adapter_version
|
|
64
|
+
|
|
65
|
+
def get_adapter_version(self):
|
|
66
|
+
return self._adapter.adapter_version
|
|
67
|
+
|
|
68
|
+
def set_model_version(self, model_version: int):
|
|
69
|
+
self._adapter.model_version = model_version
|
|
70
|
+
|
|
71
|
+
def get_model_version(self):
|
|
72
|
+
return self._adapter.model_version
|
|
73
|
+
|
|
74
|
+
def set_parameters(self, params: dict[str, OrtValue]):
|
|
75
|
+
self._adapter.parameters = {k: v._ortvalue for k, v in params.items()}
|
|
76
|
+
|
|
77
|
+
def get_parameters(self) -> dict[str, OrtValue]:
|
|
78
|
+
return {k: OrtValue(v) for k, v in self._adapter.parameters.items()}
|
|
79
|
+
|
|
80
|
+
|
|
35
81
|
def check_and_normalize_provider_args(
|
|
36
82
|
providers: Sequence[str | tuple[str, dict[Any, Any]]] | None,
|
|
37
83
|
provider_options: Sequence[dict[Any, Any]] | None,
|
|
@@ -556,7 +602,7 @@ class IOBinding:
|
|
|
556
602
|
:param name: input name
|
|
557
603
|
:param device_type: e.g. cpu, cuda, cann
|
|
558
604
|
:param device_id: device id, e.g. 0
|
|
559
|
-
:param element_type: input element type
|
|
605
|
+
:param element_type: input element type. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16)
|
|
560
606
|
:param shape: input shape
|
|
561
607
|
:param buffer_ptr: memory pointer to input data
|
|
562
608
|
"""
|
|
@@ -595,7 +641,7 @@ class IOBinding:
|
|
|
595
641
|
:param name: output name
|
|
596
642
|
:param device_type: e.g. cpu, cuda, cann, cpu by default
|
|
597
643
|
:param device_id: device id, e.g. 0
|
|
598
|
-
:param element_type: output element type
|
|
644
|
+
:param element_type: output element type. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16)
|
|
599
645
|
:param shape: output shape
|
|
600
646
|
:param buffer_ptr: memory pointer to output data
|
|
601
647
|
"""
|
|
@@ -712,17 +758,43 @@ class OrtValue:
|
|
|
712
758
|
)
|
|
713
759
|
|
|
714
760
|
@staticmethod
|
|
715
|
-
def
|
|
761
|
+
def ortvalue_from_numpy_with_onnx_type(data, onnx_element_type: int):
|
|
762
|
+
"""
|
|
763
|
+
This method creates an instance of OrtValue on top of the numpy array.
|
|
764
|
+
No data copy is made and the lifespan of the resulting OrtValue should never
|
|
765
|
+
exceed the lifespan of bytes object. The API attempts to reinterpret
|
|
766
|
+
the data type which is expected to be the same size. This is useful
|
|
767
|
+
when we want to use an ONNX data type that is not supported by numpy.
|
|
768
|
+
|
|
769
|
+
:param data: numpy.ndarray.
|
|
770
|
+
:param onnx_elemenet_type: a valid onnx TensorProto::DataType enum value
|
|
771
|
+
"""
|
|
772
|
+
return OrtValue(C.OrtValue.ortvalue_from_numpy_with_onnx_type(data, onnx_element_type), data)
|
|
773
|
+
|
|
774
|
+
@staticmethod
|
|
775
|
+
def ortvalue_from_shape_and_type(shape, element_type, device_type: str = "cpu", device_id: int = 0):
|
|
716
776
|
"""
|
|
717
777
|
Factory method to construct an OrtValue (which holds a Tensor) from given shape and element_type
|
|
718
778
|
|
|
719
779
|
:param shape: List of integers indicating the shape of the OrtValue
|
|
720
|
-
:param element_type: The data type of the elements
|
|
780
|
+
:param element_type: The data type of the elements. It can be either numpy type (like numpy.float32) or an integer for onnx type (like onnx.TensorProto.BFLOAT16).
|
|
721
781
|
:param device_type: e.g. cpu, cuda, cann, cpu by default
|
|
722
782
|
:param device_id: device id, e.g. 0
|
|
723
783
|
"""
|
|
724
|
-
|
|
725
|
-
|
|
784
|
+
# Integer for onnx element type (see https://onnx.ai/onnx/api/mapping.html).
|
|
785
|
+
# This is helpful for some data type (like TensorProto.BFLOAT16) that is not available in numpy.
|
|
786
|
+
if isinstance(element_type, int):
|
|
787
|
+
return OrtValue(
|
|
788
|
+
C.OrtValue.ortvalue_from_shape_and_onnx_type(
|
|
789
|
+
shape,
|
|
790
|
+
element_type,
|
|
791
|
+
C.OrtDevice(
|
|
792
|
+
get_ort_device_type(device_type, device_id),
|
|
793
|
+
C.OrtDevice.default_memory(),
|
|
794
|
+
device_id,
|
|
795
|
+
),
|
|
796
|
+
)
|
|
797
|
+
)
|
|
726
798
|
|
|
727
799
|
return OrtValue(
|
|
728
800
|
C.OrtValue.ortvalue_from_shape_and_type(
|
|
Binary file
|
|
Binary file
|
|
@@ -55,9 +55,15 @@ def check_distro_info():
|
|
|
55
55
|
warnings.warn(
|
|
56
56
|
f"Unsupported macOS version ({__my_distro_ver__}). ONNX Runtime supports macOS 11.0 or later."
|
|
57
57
|
)
|
|
58
|
+
elif __my_system__ == "aix":
|
|
59
|
+
import subprocess
|
|
60
|
+
|
|
61
|
+
returned_output = subprocess.check_output("oslevel")
|
|
62
|
+
__my_distro_ver__str = returned_output.decode("utf-8")
|
|
63
|
+
__my_distro_ver = __my_distro_ver__str[:3]
|
|
58
64
|
else:
|
|
59
65
|
warnings.warn(
|
|
60
|
-
f"Unsupported platform ({__my_system__}). ONNX Runtime supports Linux, macOS and Windows platforms, only."
|
|
66
|
+
f"Unsupported platform ({__my_system__}). ONNX Runtime supports Linux, macOS, AIX and Windows platforms, only."
|
|
61
67
|
)
|
|
62
68
|
|
|
63
69
|
|
|
@@ -230,7 +230,9 @@ class BaseQuantizer:
|
|
|
230
230
|
# TODO: This formula should be explained including why the scale is not estimated for the bias as well.
|
|
231
231
|
bias_scale = input_scale * weight_scale * beta
|
|
232
232
|
|
|
233
|
-
quantized_data = (np.asarray(bias_data) / bias_scale).round()
|
|
233
|
+
quantized_data = (np.asarray(bias_data) / bias_scale).round()
|
|
234
|
+
quantized_data = np.clip(quantized_data, np.iinfo(np.int32).min, np.iinfo(np.int32).max)
|
|
235
|
+
quantized_data = quantized_data.astype(np.int32)
|
|
234
236
|
|
|
235
237
|
# update bias initializer
|
|
236
238
|
bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(bias_initializer.dims)
|
|
@@ -418,6 +420,9 @@ class BaseQuantizer:
|
|
|
418
420
|
zero_point_list = []
|
|
419
421
|
scale_list = []
|
|
420
422
|
quantized_per_channel_data_list = []
|
|
423
|
+
weights_shape = list(weights.shape)
|
|
424
|
+
reshape_dims = list(weights_shape) # deep copy
|
|
425
|
+
reshape_dims[channel_axis] = 1 # only one per channel for reshape
|
|
421
426
|
for i in range(channel_count):
|
|
422
427
|
per_channel_data = weights.take(i, channel_axis)
|
|
423
428
|
channel_override_index = i if i < num_channel_overrides else 0
|
|
@@ -460,17 +465,10 @@ class BaseQuantizer:
|
|
|
460
465
|
|
|
461
466
|
zero_point_list.append(zero_point)
|
|
462
467
|
scale_list.append(scale)
|
|
463
|
-
quantized_per_channel_data_list.append(quantized_per_channel_data)
|
|
468
|
+
quantized_per_channel_data_list.append(np.asarray(quantized_per_channel_data).reshape(reshape_dims))
|
|
464
469
|
|
|
465
470
|
# combine per_channel_data into one
|
|
466
|
-
|
|
467
|
-
reshape_dims = list(weights_shape) # deep copy
|
|
468
|
-
reshape_dims[channel_axis] = 1 # only one per channel for reshape
|
|
469
|
-
quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
|
|
470
|
-
for i in range(1, len(quantized_per_channel_data_list)):
|
|
471
|
-
channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
|
|
472
|
-
quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)
|
|
473
|
-
|
|
471
|
+
quantized_weights = np.concatenate(quantized_per_channel_data_list, channel_axis)
|
|
474
472
|
q_weight_name = weight_name + TENSOR_NAME_QUANT_SUFFIX
|
|
475
473
|
zp_name = weight_name + "_zero_point"
|
|
476
474
|
scale_name = weight_name + "_scale"
|
|
@@ -519,8 +517,6 @@ class BaseQuantizer:
|
|
|
519
517
|
for node in self.model.nodes():
|
|
520
518
|
# adjust tensor_ranges for input of Clip and Relu node
|
|
521
519
|
if node.op_type in ["Clip", "Relu"]:
|
|
522
|
-
if self.is_activation_symmetric:
|
|
523
|
-
continue
|
|
524
520
|
if not self.should_quantize_node(node):
|
|
525
521
|
continue
|
|
526
522
|
if len(self.model.input_name_to_nodes()[node.input[0]]) != 1:
|
|
@@ -69,6 +69,7 @@ class TensorData:
|
|
|
69
69
|
_floats = frozenset(["avg", "std", "lowest", "highest", "hist_edges"])
|
|
70
70
|
|
|
71
71
|
def __init__(self, **kwargs):
|
|
72
|
+
self._attrs = list(kwargs.keys())
|
|
72
73
|
for k, v in kwargs.items():
|
|
73
74
|
if k not in TensorData._allowed:
|
|
74
75
|
raise ValueError(f"Unexpected value {k!r} not in {TensorData._allowed}.")
|
|
@@ -91,6 +92,12 @@ class TensorData:
|
|
|
91
92
|
raise AttributeError(f"Attributes 'avg' and/or 'std' missing in {dir(self)}.")
|
|
92
93
|
return (self.avg, self.std)
|
|
93
94
|
|
|
95
|
+
def to_dict(self):
|
|
96
|
+
# This is needed to serialize the data into JSON.
|
|
97
|
+
data = {k: getattr(self, k) for k in self._attrs}
|
|
98
|
+
data["CLS"] = self.__class__.__name__
|
|
99
|
+
return data
|
|
100
|
+
|
|
94
101
|
|
|
95
102
|
class TensorsData:
|
|
96
103
|
def __init__(self, calibration_method, data: Dict[str, Union[TensorData, Tuple]]):
|
|
@@ -125,12 +132,24 @@ class TensorsData:
|
|
|
125
132
|
raise RuntimeError(f"Only an existing tensor can be modified, {key!r} is not.")
|
|
126
133
|
self.data[key] = value
|
|
127
134
|
|
|
135
|
+
def keys(self):
|
|
136
|
+
return self.data.keys()
|
|
137
|
+
|
|
128
138
|
def values(self):
|
|
129
139
|
return self.data.values()
|
|
130
140
|
|
|
131
141
|
def items(self):
|
|
132
142
|
return self.data.items()
|
|
133
143
|
|
|
144
|
+
def to_dict(self):
|
|
145
|
+
# This is needed to serialize the data into JSON.
|
|
146
|
+
data = {
|
|
147
|
+
"CLS": self.__class__.__name__,
|
|
148
|
+
"data": self.data,
|
|
149
|
+
"calibration_method": self.calibration_method,
|
|
150
|
+
}
|
|
151
|
+
return data
|
|
152
|
+
|
|
134
153
|
|
|
135
154
|
class CalibrationMethod(Enum):
|
|
136
155
|
MinMax = 0
|
|
@@ -565,16 +584,29 @@ class HistogramCalibrater(CalibraterBase):
|
|
|
565
584
|
"""
|
|
566
585
|
Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
|
|
567
586
|
"""
|
|
587
|
+
input_names_set = {node_arg.name for node_arg in self.infer_session.get_inputs()}
|
|
588
|
+
output_names = [node_arg.name for node_arg in self.infer_session.get_outputs()]
|
|
589
|
+
|
|
568
590
|
while True:
|
|
569
591
|
inputs = data_reader.get_next()
|
|
570
592
|
if not inputs:
|
|
571
593
|
break
|
|
572
|
-
self.
|
|
594
|
+
outputs = self.infer_session.run(None, inputs)
|
|
595
|
+
|
|
596
|
+
# Copy np.ndarray only for graph outputs that are also graph inputs to workaround bug:
|
|
597
|
+
# https://github.com/microsoft/onnxruntime/issues/21922
|
|
598
|
+
fixed_outputs = []
|
|
599
|
+
for output_index, output in enumerate(outputs):
|
|
600
|
+
if output_names[output_index] in input_names_set:
|
|
601
|
+
fixed_outputs.append(copy.copy(output))
|
|
602
|
+
else:
|
|
603
|
+
fixed_outputs.append(output)
|
|
604
|
+
|
|
605
|
+
self.intermediate_outputs.append(fixed_outputs)
|
|
573
606
|
|
|
574
607
|
if len(self.intermediate_outputs) == 0:
|
|
575
608
|
raise ValueError("No data is collected.")
|
|
576
609
|
|
|
577
|
-
output_names = [self.infer_session.get_outputs()[i].name for i in range(len(self.intermediate_outputs[0]))]
|
|
578
610
|
output_dicts_list = [
|
|
579
611
|
dict(zip(output_names, intermediate_output)) for intermediate_output in self.intermediate_outputs
|
|
580
612
|
]
|