PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp310-cp310-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""
+Package: openvino
+Low level wrappers for the FrontEnd C++ API.
+"""
+# flake8: noqa
+from openvino._pyopenvino import get_version
+__version__ = get_version()
+# main classes
+from openvino.frontend.frontend import FrontEndManager
+from openvino.frontend.frontend import FrontEnd
+from openvino._pyopenvino import InputModel
+from openvino._pyopenvino import NodeContext
+from openvino._pyopenvino import Place
+# extensions
+from openvino._pyopenvino import DecoderTransformationExtension
+from openvino._pyopenvino import ConversionExtension
+from openvino._pyopenvino import OpExtension
+from openvino._pyopenvino import ProgressReporterExtension
+from openvino._pyopenvino import TelemetryExtension
+# exceptions
+from openvino._pyopenvino import NotImplementedFailure
+from openvino._pyopenvino import InitializationFailure
+from openvino._pyopenvino import OpConversionFailure
+from openvino._pyopenvino import OpValidationFailure
+from openvino._pyopenvino import GeneralFailure

intel_npu_acceleration_library/external/openvino/frontend/frontend.py ADDED Viewed

@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+from typing import Union
+from openvino._pyopenvino import FrontEnd as FrontEndBase
+from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase
+from openvino._pyopenvino import InputModel
+from openvino.runtime import Model
+class FrontEnd(FrontEndBase):
+    def __init__(self, fe: FrontEndBase) -> None:
+        super().__init__(fe)
+    def convert(self, model: Union[Model, InputModel]) -> Model:
+        converted_model = super().convert(model)
+        if isinstance(model, InputModel):
+            return Model(converted_model)
+        return converted_model
+    def convert_partially(self, model: InputModel) -> Model:
+        return Model(super().convert_partially(model))
+    def decode(self, model: InputModel) -> Model:
+        return Model(super().decode(model))
+    def normalize(self, model: Model) -> None:
+        super().normalize(model)
+class FrontEndManager(FrontEndManagerBase):
+    def load_by_framework(self, framework: str) -> Union[FrontEnd, None]:
+        fe = super().load_by_framework(framework)
+        if fe is not None:
+            return FrontEnd(fe)
+        return fe
+    def load_by_model(self, model: str) -> Union[FrontEnd, None]:
+        fe = super().load_by_model(model)
+        if fe is not None:
+            return FrontEnd(fe)
+        return fe

intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""
+Package: openvino
+Low level wrappers for the FrontEnd C++ API.
+"""
+# flake8: noqa
+try:
+    from openvino.frontend.onnx.py_onnx_frontend import ConversionExtensionONNX as ConversionExtension
+    from openvino.frontend.onnx.py_onnx_frontend import OpExtensionONNX as OpExtension
+except ImportError as err:
+    raise ImportError("OpenVINO ONNX frontend is not available, please make sure the frontend is built. " "{}".format(err))

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""
+Package: openvino
+Low level wrappers for the FrontEnd C++ API.
+"""
+# flake8: noqa
+try:
+    from openvino.frontend.paddle.py_paddle_frontend import ConversionExtensionPaddle as ConversionExtension
+    from openvino.frontend.paddle.py_paddle_frontend import OpExtensionPaddle as OpExtension
+except ImportError as err:
+    raise ImportError("OpenVINO Paddle frontend is not available, please make sure the frontend is built." "{}".format(err))

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""
+Package: openvino
+Low level wrappers for the FrontEnd C++ API.
+"""
+# flake8: noqa
+try:
+    from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
+    from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
+    from openvino.frontend.pytorch.py_pytorch_frontend import ConversionExtensionPytorch as ConversionExtension
+    from openvino.frontend.pytorch.py_pytorch_frontend import OpExtensionPytorch as OpExtension
+    from openvino.frontend.pytorch.module_extension import ModuleExtension
+except ImportError as err:
+    raise ImportError("OpenVINO PyTorch frontend is not available, please make sure the frontend is built."
+                      "{}".format(err))

intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py ADDED Viewed

@@ -0,0 +1,352 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder
+from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
+from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape
+from openvino.frontend.pytorch.utils import make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const
+import torch
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+class TorchFXPythonDecoder (Decoder):
+    def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
+        Decoder.__init__(self)
+        self.mark_node_callback = mark_node_callback
+        # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted
+        self.m_decoders = []
+        self.pt_module = pt_module
+        self.fx_gm = fx_gm if fx_gm is not None else pt_module
+        self.input_types = [OVAny(pt_to_ov_type_map[str(t)])
+                            for t in input_types]
+        self.input_shapes = input_shapes
+        self._input_signature = []
+        if issubclass(type(pt_module), torch.fx.graph_module.GraphModule):
+            self._input_is_list = None
+            self._nodes = list(pt_module.graph.nodes)
+            self._inputs = []
+            self._outputs = []
+            found_types = []
+            found_shapes = []
+            for i in range(len(self._nodes)):
+                if self._nodes[i].op == 'placeholder':
+                    self._inputs.append(i)
+                    value = self._nodes[i]
+                    self._input_signature.append(value.name)
+                    if hasattr(value, "meta") and ('tensor_meta' in value.meta.keys()) and value.meta['tensor_meta']:
+                        found_shapes.append(value.meta['tensor_meta'].shape)
+                        found_types.append(
+                            OVAny(pt_to_ov_type_map[str(value.meta['tensor_meta'].dtype)]))
+                    else:
+                        found_shapes.append(None)
+                        found_types.append(None)
+                elif self._nodes[i].op == 'output':
+                    # Instead of putting output index, refer to its target
+                    uargs = self.unpack_containers(self._nodes[i].args)
+                    self._outputs = [(arg[0], self._nodes.index(arg[1]))
+                                     for arg in uargs if arg[1] is not None]
+            for idx, shape in enumerate(found_shapes):
+                if shape is not None:
+                    new_shape=[]
+                    for dim in range(0, len(shape)):
+                        if (type(shape[dim]).__name__ == "SymInt"):
+                            new_shape.append(-1)
+                        else:
+                            new_shape.append(shape[dim])
+                    found_shapes[idx] = torch.Size(new_shape)
+            if not input_shapes or len(input_shapes) == 0:
+                self.input_shapes = found_shapes
+            if not input_types or len(input_types) == 0:
+                self.input_types = found_types
+        elif issubclass(type(pt_module), torch.fx.Node):
+            self._nodes = nodes  # passed from outer context
+            # FIXME: Quadratic complexity nodes*nodes considering the outer loop over all nodes
+            self._outputs = [("", self._nodes.index(pt_module))]
+            # None in inputs mean the input is inlined or None (also considered inlined)
+            self._inputs = [self._nodes.index(
+                arg) if arg in self._nodes else (arg,) for arg in pt_module.args]
+            # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
+            new_inputs = []
+            self.input_types = []
+            for i in range(len(pt_module.args)):
+                if isinstance(pt_module.args[i], (list, tuple)) and any([isinstance(a, torch.fx.Node) for a in pt_module.args[i]]):
+                    for arg in pt_module.args[i]:
+                        if arg in self._nodes:
+                            new_inputs.append(self._nodes.index(arg))
+                        else:
+                            new_inputs.append((arg,))
+                        self.input_types.append(OVAny(DecoderType.List(
+                            TorchFXPythonDecoder.get_type_for_value(arg))))
+                else:
+                    v = self._inputs[i]
+                    new_inputs.append(v)
+                    self.input_types.append(
+                        TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v]))
+            self._inputs = new_inputs
+    def inputs(self):
+        # Consider 0 a special case which may mean the input is inlined, but not guaranteed
+        return [x if not isinstance(x, tuple) else 0 for x in self._inputs]
+    def is_input_inlined(self, index):
+        return isinstance(self._inputs[index], tuple)
+    @staticmethod
+    def unpack_containers(arg):
+        if isinstance(arg, (tuple, list)):
+            res = []
+            for e in arg:
+                res.extend(TorchFXPythonDecoder.unpack_containers(e))
+            return res
+        elif isinstance(arg, dict):
+            res = []
+            for k, e in arg.items():
+                unpacked = TorchFXPythonDecoder.unpack_containers(e)
+                if len(unpacked) == 1:
+                    unpacked[0] = (k, unpacked[0][1])
+                res.extend(unpacked)
+            return res
+        else:
+            return [("", arg)]
+    @staticmethod
+    def arg_to_constant(arg):
+        if isinstance(arg, list):
+            if len(arg) > 0:
+                return make_constant(pt_to_ov_type_map[type(
+                    arg[0]).__name__], Shape([len(arg)]), arg)
+            else:
+                # TODO: which type should we use if list is empty? Need a signaling value here
+                return make_constant(OVType.i32, Shape([0]), [])
+        elif isinstance(arg, bool):
+            return make_constant(OVType.boolean, Shape([]), [arg])
+        elif isinstance(arg, int):
+            return make_constant(OVType.i64, Shape([]), [arg])
+        elif isinstance(arg, float):
+            return make_constant(OVType.f32, Shape([]), [arg])
+        return None
+    def inlined_input(self, index):
+        assert index < len(self._inputs), "Requested input doesn't exist"
+        assert isinstance(
+            self._inputs[index], tuple), "Requested input which is not inlined"
+        assert self._inputs[index][0] is not None, "Requested None inlined input"
+        constant = None
+        arg = self._inputs[index][0]
+        constant = self.arg_to_constant(arg)
+        assert constant is not None, f"Constant wasn't created for inlined input {index}"
+        return constant.outputs()
+    def input(self, index):  # TODO: remove
+        return self.inputs()[index]  # TODO: find specialized method
+    def get_input_debug_name(self, index):
+        return "input"+str(index)
+    def get_input_signature_name(self, index: int) -> str:
+        if self._input_signature is not None and index < len(self._input_signature):
+            return self._input_signature[index]
+        return self.get_input_debug_name(index)
+    def get_input_shape(self, index):
+        if index < len(self.input_shapes) and self.input_shapes[index] is not None:
+            return PartialShape(self.input_shapes[index])
+        input = self._raw_input(index)
+        return self.get_shape_for_value(input)
+    def get_input_strides(self, index: int) -> list:
+        raw_input = self._raw_input(index)
+        if isinstance(raw_input, torch.fx.node.Node) and hasattr(raw_input, "meta"):
+            meta = raw_input.meta
+            if "tensor_meta" in meta and hasattr(meta["tensor_meta"], "stride"):
+                strides = list(meta["tensor_meta"].stride)
+                if strides:
+                    return strides
+        return []
+    def get_input_type(self, index):
+        if index < len(self.input_types) and self.input_types[index] is not None:
+            return self.input_types[index]
+        input = self._raw_input(index)
+        return self.get_type_for_value(input)
+    def get_output_debug_name(self, index):
+        if self._outputs is not None and index < len(self._outputs) and self._outputs[index][0]:
+            return self._outputs[index][0]
+        name = getattr(self.pt_module, "name", "output")
+        return name + ":" + str(index)
+    def get_output_shape(self, index):
+        output = self._raw_output(index)
+        return self.get_shape_for_value(output)
+    def get_output_type(self, index):
+        output = self._raw_output(index)
+        return self.get_type_for_value(output)
+    def get_shape_for_value(self, value):
+        if value and hasattr(value, "meta") and ('tensor_meta' in value.meta.keys()):
+            if value.meta['tensor_meta']:
+                return PartialShape(len(value.meta['tensor_meta'].shape) * [-1])
+        return PartialShape.dynamic()
+    @staticmethod
+    def get_type_for_value(value):
+        if issubclass(type(value), torch.fx.Node):
+            if ('tensor_meta' in value.meta.keys()):
+                if value.meta['tensor_meta'] and isinstance(value.meta['tensor_meta'], torch.Tensor):
+                    pt_type = value.meta['tensor_meta'].dtype
+                    if str(pt_type) in pt_to_ov_type_map:
+                        ov_type = pt_to_ov_type_map[str(pt_type)]
+                        return OVAny(ov_type)
+            return OVAny(OVType.dynamic)
+        elif isinstance(value, int):
+            return OVAny(DecoderType.PyScalar(OVAny(OVType.i64)))
+        elif isinstance(value, float):
+            return OVAny(DecoderType.PyScalar(OVAny(OVType.f32)))
+        elif isinstance(value, bool):
+            return OVAny(DecoderType.PyScalar(OVAny(OVType.boolean)))
+        return OVAny(OVType.dynamic)
+    def get_attribute(self, name):
+        if name in self.pt_module.kwargs:
+            attr = self.pt_module.kwargs[name]
+            if isinstance(attr, torch.dtype):
+                return OVAny(pt_to_ov_type_map[str(attr)])
+            if isinstance(attr, torch.device):
+                return OVAny(attr.type)
+            if isinstance(attr, str):
+                return OVAny(attr)
+            # Numeric attrs convert to Constant
+            constant = self.arg_to_constant(attr)
+            if constant is not None:
+                return OVAny(constant.output(0))
+            # so that has_attribute return True if attribute exist
+            return OVAny(DecoderType.PyNone())
+        return OVAny(None)
+    def get_named_input(self, name):
+        """
+        Returns id of kwargs input. Such input can be Node or a constant value,
+        this function is only used for to return node index. If the input is
+        constant, get_attribute should be used.
+        """
+        if name in self.pt_module.kwargs:
+            arg = self.pt_module.kwargs[name]
+            if isinstance(arg, torch.fx.Node):
+                return self._nodes.index(arg)
+        raise RuntimeError("This input is not a Node")
+    def get_subgraph_size(self):
+        if issubclass(type(self.pt_module), torch.fx.Node):
+            return 0
+        return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
+    def decoder_type_name(self) -> str:
+        return "fx"
+    def visit_subgraph(self, node_visitor):
+        # make sure topological order is satisfied
+        for node in self._nodes:
+            if node.op == 'placeholder' or node.op == 'output':
+                continue  # skipping non-operational nodes
+            if node.op == 'call_function' and str(node.target) in ["aten._assert_async.msg"]:
+                continue
+            decoder = TorchFXPythonDecoder(
+                node, self.fx_gm, self._nodes, mark_node_callback=self.mark_node_callback)
+            self.m_decoders.append(decoder)
+            node_visitor(decoder)
+    def get_subgraphs(self):
+        if issubclass(type(self.pt_module), torch.fx.Node):
+            return []
+        return list(self.pt_module.blocks())
+    def get_subgraph_decoder(self, index):
+        decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
+                                       self.fx_gm,
+                                       mark_node_callback=self.mark_node_callback)
+        self.m_decoders.append(decoder)
+        return decoder
+    def get_op_type(self):
+        if self.pt_module.op == 'call_function':
+            return str(self.pt_module.target)
+        elif self.pt_module.op == 'get_attr':
+            return 'get_attr'  # FIXME should be aligned with get_attr from TS implementation
+        else:
+            return 'UNKNOWN_TYPE_' + str(self.pt_module.op)
+    def get_schema(self):
+        return 'NONE'
+    def outputs(self):
+        return [o[1] for o in self._outputs]
+    def _raw_outputs(self):
+        return [self._nodes[x[1]] for x in self._outputs]
+    def _raw_output(self, index):
+        return self._raw_outputs()[index]
+    def _raw_inputs(self):
+        return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs]
+    def _raw_input(self, index):
+        return self._raw_inputs()[index]
+    def num_of_outputs(self):
+        return len(self.outputs())
+    def output(self, index):
+        return self.outputs()[index]
+    def mark_node(self, node):
+        name = self.get_op_type()
+        if "FrameworkNode" not in node.get_type_name():
+            name += "/" + node.get_type_name()
+        node.set_friendly_name(self.pt_module.name + "/" + name)
+        if self.mark_node_callback is not None:
+            self.mark_node_callback(self, node)
+        return node
+    def as_constant(self):
+        assert self.pt_module.op == 'get_attr', "Only get_attr is supported"
+        # Extract Constant from FX module field
+        ret = fetch_attr(self.fx_gm, self.pt_module.target)
+        ov_const = torch_tensor_to_ov_const(ret, shared_memory=True)
+        return ov_const.outputs()
+    def as_string(self):
+        return None
+    def input_is_none(self, index):
+        if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None):
+            return True
+        else:
+            r_input = self._raw_input(index)
+            return str(type(r_input)) in ['torch.NoneType', 'NoneType']
+    def debug(self):
+        self.pt_module.print()
+    def may_produce_alias(self, in_index: int, out_index: int) -> bool:
+        return False

intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import torch
+from functools import partial
+# Wraps a single tensor to a module to prevent it from jit.freezing
+# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved.
+class KeepWeight(torch.nn.Module):
+    def __init__(self, weight):
+        super().__init__()
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
+    def forward(self):
+        return self.weight
+# Produces a pattern that can be captured later and represented as a single u4 constant node
+def decompression_pattern(weights):
+    mask = torch.tensor(15, dtype=torch.uint8).to(weights.device)
+    return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1)
+def patched_forward(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.view(-1, x.shape[-1])
+    groups = self.qzeros.shape[0]
+    height = self.qweight.shape[0]
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    unpacked_zp = decompression_pattern(
+        self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
+    unpacked_zp = unpacked_zp.to(dtype) + 1
+    unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+    out = x @ unpacked_weights
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
+# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
+supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
+def patch_model(model):
+    for name, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            # already patched, skipping
+            continue
+        # TODO: Check module type
+        is_quantized = getattr(m, 'is_quantized', None)
+        if is_quantized is not None:
+            m.is_quantized = False
+        m.float()  # enables tracing on CPU, applied for all modules
+        if hasattr(m, 'QUANT_TYPE'):
+            if m.QUANT_TYPE not in supported_quant_types:
+                raise ValueError(
+                    f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}')
+            if m.bits != 4:
+                raise ValueError(
+                    f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.')
+            int4_in_int32 = 8
+            groups = m.qzeros.shape[0]
+            m.width = m.qweight.shape[1]
+            assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
+            m._openvino_patch_orig_forward = m.forward
+            m.forward = partial(patched_forward, m)
+            # Keep original field properties to be used when model is returned back to its original state
+            m._openvino_patch_orig_qweights_type = m.qweight.dtype
+            m._openvino_patch_orig_qzeros_type = m.qzeros.dtype
+            m._openvino_patch_orig_scale_shape = m.scales.shape
+            m.qweight = m.qweight.view(dtype=torch.uint8)
+            m.qzeros = m.qzeros.view(dtype=torch.uint8)
+            # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
+            m.add_module(
+                '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
+            m.add_module('_openvino_u4_compression_submodule_qzeros',
+                         KeepWeight(m.qzeros))
+            m.scales = m.scales.view(-1, 1, m.width)
+def unpatch_model(model):
+    for _, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            try:
+                m.forward = m._openvino_patch_orig_forward
+                del m._openvino_patch_orig_forward
+                m.qweight = m.qweight.view(
+                    dtype=m._openvino_patch_orig_qweights_type)
+                del m._openvino_patch_orig_qweights_type
+                m.qzeros = m.qzeros.view(
+                    dtype=m._openvino_patch_orig_qzeros_type)
+                del m._openvino_patch_orig_qzeros_type
+                m.scales = m.scales.view(m._openvino_patch_orig_scale_shape)
+                del m._openvino_patch_orig_scale_shape
+                del m._openvino_u4_compression_submodule_qweights
+                del m._openvino_u4_compression_submodule_qzeros
+            except Exception as error:
+                print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model')
+                print(error)
+def detect_gptq_model_raw(model):
+    return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq'
+def detect_gptq_model(model):
+    return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model)