PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20250114__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py ADDED Viewed

@@ -0,0 +1,180 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import torch
+from functools import partial
+# Wraps a single tensor to a module to prevent it from jit.freezing
+# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved.
+class KeepWeight(torch.nn.Module):
+    def __init__(self, weight):
+        super().__init__()
+        self.weight = torch.nn.Parameter(weight, requires_grad=False)
+    def forward(self):
+        return self.weight
+# Produces a pattern that can be captured later and represented as a single u4 constant node
+def decompression_pattern(weights):
+    mask = torch.tensor(15, dtype=torch.uint8).to(weights.device)
+    return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1)
+def patched_forward(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.contiguous().view(-1, x.shape[-1])
+    groups = self.qzeros.shape[0]
+    height = self.qweight.shape[0]
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    unpacked_zp = decompression_pattern(
+        self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
+    unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+    out = x @ unpacked_weights
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
+def patched_forward_sym(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.contiguous().view(-1, x.shape[-1])
+    height = self.qweight.shape[0]
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
+    unpacked_weights = unpacked_weights.to(dtype) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+    out = x @ unpacked_weights
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
+# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
+supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
+def patch_model(model):
+    is_symmetrical = False
+    config = None
+    if hasattr(model, "config"):
+        config = model.config
+    elif hasattr(model, "model") and hasattr(model.model, "config"):
+        # original model was wrapped
+        config = model.model.config
+    if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
+        is_symmetrical = config.quantization_config.sym
+    for name, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            # already patched, skipping
+            continue
+        # TODO: Check module type
+        is_quantized = getattr(m, 'is_quantized', None)
+        if is_quantized is not None:
+            m.is_quantized = False
+        m.float()  # enables tracing on CPU, applied for all modules
+        if hasattr(m, 'QUANT_TYPE'):
+            if m.QUANT_TYPE not in supported_quant_types:
+                raise ValueError(
+                    f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}')
+            if m.bits != 4:
+                raise ValueError(
+                    f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.')
+            int4_in_int32 = 8
+            groups = m.qzeros.shape[0]
+            m.width = m.qweight.shape[1]
+            assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
+            m._openvino_patch_orig_forward = m.forward
+            if is_symmetrical:
+                m.forward = partial(patched_forward_sym, m)
+            else:
+                m.forward = partial(patched_forward, m)
+            # Keep original field properties to be used when model is returned back to its original state
+            m._openvino_patch_orig_qweights_type = m.qweight.dtype
+            m._openvino_patch_orig_qzeros_type = m.qzeros.dtype
+            m._openvino_patch_orig_scale_shape = m.scales.shape
+            m.qweight = m.qweight.view(dtype=torch.uint8)
+            m.qzeros = m.qzeros.view(dtype=torch.uint8)
+            # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
+            m.add_module(
+                '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
+            # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
+            m.add_module('_openvino_u4_compression_submodule_qzeros',
+                         KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
+            m.scales = m.scales.view(-1, 1, m.width)
+def unpatch_model(model):
+    for _, m in model.named_modules():
+        if hasattr(m, '_openvino_patch_orig_forward'):
+            try:
+                m.forward = m._openvino_patch_orig_forward
+                del m._openvino_patch_orig_forward
+                m.qweight = m.qweight.view(
+                    dtype=m._openvino_patch_orig_qweights_type)
+                del m._openvino_patch_orig_qweights_type
+                m.qzeros = m.qzeros.view(
+                    dtype=m._openvino_patch_orig_qzeros_type)
+                del m._openvino_patch_orig_qzeros_type
+                m.scales = m.scales.view(m._openvino_patch_orig_scale_shape)
+                del m._openvino_patch_orig_scale_shape
+                del m._openvino_u4_compression_submodule_qweights
+                del m._openvino_u4_compression_submodule_qzeros
+            except Exception as error:
+                print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model')
+                print(error)
+def detect_gptq_model_raw(model):
+    return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq'
+def detect_gptq_model(model):
+    return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model)

intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py ADDED Viewed

@@ -0,0 +1,39 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+class ModuleExtension:
+    def __init__(self, module, target_op, evaluate=None, convert=None):
+        """
+        Creates an extension that replaces entire PyTorch module by a single operation.
+        This functionality works with PyTorch models only. A module can be identified by
+        module type (e.g. torch.nn.Linear), module instance in the model or module name.
+        Args:
+            module (str, torch.nn.Module, type(torch.nn.Module)): PyTorch module to replace
+            target_op (str): a target operation that will be used as a replacer for the module,
+                            could be a name of the extension operation or existing PyTorch operation
+                            (with prim:: or aten:: prefix following TorchScript syntax).
+            evaluate (callable with args module, *args, **kwargs): a callable that will replace a target
+                            module in model execution it is responsible for producing valid output for
+                            the module to allow correct model tracing. By default it calls original module
+                            forward with the same arguments. The provided code will not be a part of the final
+                            traced model, it is used only to produce valid results in the tracing.
+            convert (callable with args target_op, *args, **kwargs): a callable that will be traced and become
+                            a part of the final model instead of the target module. It accepts target_op as
+                            the first parameter, target_op is callable that will appear as a single node in the
+                            graph, the type of the node is target_op provided as another argument above.
+        """
+        self.module = module
+        self.target_op = target_op
+        self.evaluate = evaluate
+        if self.evaluate is None:
+            self.evaluate = lambda module, *args, **kwargs: module(*args, **kwargs)
+        self.convert = convert
+        if self.convert is None:
+            self.convert = lambda module, target_op, *args, **kwargs: target_op(*args, **kwargs)

intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py ADDED Viewed

@@ -0,0 +1,118 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import torch
+from openvino.frontend.pytorch import ModuleExtension
+class no_jit_trace:
+    def __enter__(self):
+        self.state = torch._C._get_tracing_state()
+        torch._C._set_tracing_state(None)
+    def __exit__(self, *args):
+        torch._C._set_tracing_state(self.state)
+        self.state = None
+def patch_model(model, module_extensions, orig_forward_name):
+    def module_patcher(m, name):
+        extension = None
+        if m in module_extensions:
+            extension = module_extensions[m]
+        elif m.__class__ in module_extensions:
+            extension = module_extensions[m.__class__]
+        elif name in module_extensions:
+            extension = module_extensions[name]
+        if extension:
+            # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
+            class Trampoline(torch.autograd.Function):
+                target_extension = extension
+                original_module = m
+                stashed_args = None
+                stashed_kwargs = None
+                @staticmethod
+                @torch.jit.ignore
+                def forward(*args, **kwargs):
+                    with no_jit_trace():
+                        # `module` is going to be passed to a user-defined function `evaluate`
+                        # `module` is patched: forward function was replaced, and we are actually in this patched function right in this code
+                        # if we pass `module` as-is to the user code below, and it happens to call forward it will lead to infinite recursion or fail
+                        # so we need to temporary patch the module back to the original forward and then return it back again
+                        # stash the current forward to be able to return it back
+                        patched_forward = m.forward
+                        # set original forward for the module
+                        m.forward = getattr(m, orig_forward_name)
+                        # call user code
+                        results = extension.evaluate(
+                            m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs)  # call user code
+                        m.forward = patched_forward  # return patched forward back
+                        return results
+            def new_forward(*args, **kwargs):
+                Trampoline.stashed_args = args
+                Trampoline.stashed_kwargs = kwargs
+                return extension.convert(m, Trampoline.apply, *args, **kwargs)
+            setattr(m, orig_forward_name, m.forward)
+            m.forward = new_forward
+    for name, m in model.named_modules():
+        if hasattr(m, orig_forward_name):
+            # already patched, skipping with a warning because it is unexpected
+            print(f'[ WARNING ] Unexpectedly found already patched module {name} while applying ModuleExtension during PyTorch model conversion. '
+                  'Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model.')
+            continue
+        module_patcher(m, name)
+def unpatch_model(model, orig_forward_name):
+    for _, m in model.named_modules():
+        if hasattr(m, orig_forward_name):
+            try:
+                m.forward = getattr(m, orig_forward_name)
+                delattr(m, orig_forward_name)
+            except Exception as error:
+                print('[ WARNING ] Exception raised during model unpatching. Depending on the exact issue it may lead to broken original model.')
+                print('Original exception details:')
+                print(error)
+def __make_16bit_traceable(model: torch.nn.Module):
+    """
+    Prepare a 16-bit PyTorch model for tracing with OpenVINO.
+     - Replace known list of modules with ModuleExtension.
+     - Convert other modules with weights to FP32.
+    """
+    extensions = {
+        torch.nn.Linear: ModuleExtension(
+            torch.nn.Linear, "ov_ext::linear",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)),
+        torch.nn.Embedding: ModuleExtension(
+            torch.nn.Embedding, "ov_ext::embedding",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)),
+    }
+    try:
+        from transformers.pytorch_utils import Conv1D
+        extensions[Conv1D] = ModuleExtension(
+            Conv1D, "ov_ext::conv1d",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
+    except:
+        pass
+    patch_model(model, extensions,
+                "_openvino_module_extension_patch_orig_forward")
+    for _, module in model.named_modules():
+        if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)])
+                                                   or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])):
+            module.float()

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd ADDED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import logging
+import os
+from functools import partial
+from hashlib import sha256
+import torch
+from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
+from torch._dynamo.backends.registry import register_backend
+from torch._inductor.compile_fx import compile_fx
+from torch._inductor.freezing import replace_params_with_constants
+from torch.fx.experimental.proxy_tensor import make_fx
+from torch._decomp import decomposition_table, get_decompositions
+from openvino.frontend import FrontEndManager
+from openvino.runtime import Core, Type, PartialShape
+from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
+from openvino.frontend.pytorch.torchdynamo import decompositions
+from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list
+from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
+from openvino.frontend.pytorch.torchdynamo.execute import execute, execute_cached
+from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model
+from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd
+from openvino.runtime import Core, Type, PartialShape
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+"""
+    This is a preview feature in OpenVINO. This feature
+    enables users to compile PyTorch models using torch.compile
+    with OpenVINO as a target backend in PyTorch applications
+    Sample usage:
+    This sample code loads resnet50 torchvision model and compiles it using torch dynamo.
+    We can then use this model for inference. We only need to add two lines of code to
+    the Pytorch applications which are marked in the code below
+    1) import openvino.torch
+    model = torchvision.models.resnet50()
+    2) model = torch.compile(model, backend="openvino")
+"""
+openvino_options = {}
+@register_backend
+@fake_tensor_unsupported
+def openvino(subgraph, example_inputs, options=None):
+    if (_get_aot_autograd(options)):
+        global openvino_options
+        openvino_options = options
+        decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
+        return aot_autograd(fw_compiler=fx_openvino,
+                            bw_compiler=fx_openvino,
+                            decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
+    return fx_openvino(subgraph, example_inputs, options)
+def fx_openvino(subgraph, example_inputs, options=None):
+    try:
+        if len(openvino_options) != 0:
+            options = openvino_options
+        executor_parameters = None
+        inputs_reversed = False
+        openvino_model_caching = _get_model_caching(options)
+        if openvino_model_caching is not None and openvino_model_caching:
+            # Create a hash to be used for caching
+            model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
+            executor_parameters = {"model_hash_str": model_hash_str}
+            # Check if the model was fully supported and already cached
+            example_inputs.reverse()
+            inputs_reversed = True
+            maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", _get_device(options), example_inputs, _get_cache_dir(options))
+            if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
+                # Model is fully supported and already cached. Run the cached OV model directly.
+                compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs)
+                def _call(*args):
+                    res = execute_cached(compiled_model, *args)
+                    return res
+                return _call
+        if inputs_reversed:
+            example_inputs.reverse()
+        preserved_arg_indices = []
+        if (_get_aot_autograd(options)):
+            if tracing_context := torch._guards.TracingContext.try_get():
+                fw_metadata = tracing_context.fw_metadata
+                params_flat = tracing_context.params_flat
+                assert fw_metadata is not None and params_flat is not None
+            preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
+            example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
+            model = subgraph
+        else:
+            from torch._subclasses.fake_tensor import FakeTensorMode
+            decompositions = _get_decompositions(options) + get_inf_decomposition_list()
+            with FakeTensorMode(allow_non_fake_inputs=True):
+                model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs)
+            with torch.no_grad():
+                model.eval()
+        partitioner = Partitioner(options)
+        compiled_model = partitioner.make_partitions(model, options)
+        if executor_parameters is not None and 'model_hash_str' in executor_parameters:
+            # Check if the model is fully supported.
+            fully_supported = partitioner.check_fully_supported(compiled_model)
+            if fully_supported:
+                executor_parameters["model_hash_str"] += "_fs"
+        def _call(*args):
+            if(_get_aot_autograd(options)):
+                args_list = args[0]
+                args_new = [args_list[i] for i in preserved_arg_indices]
+                args = args_new
+            res = execute(compiled_model, *args, executor="openvino",
+                          executor_parameters=executor_parameters, options=options)
+            return res
+        if(_get_aot_autograd(options)):
+            _call._boxed_call = True # type: ignore[attr-defined]
+        return _call
+    except Exception as e:
+        logger.debug(f"Failed in OpenVINO execution: {e}")
+        return compile_fx(subgraph, example_inputs)
+def reset():
+    clear_caches()

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+from typing import Optional, Any
+from openvino.runtime import Core
+def _get_device(options) -> Optional[Any]:
+    core = Core()
+    device = "CPU"
+    if options is not None and "device" in options:
+        device = options["device"]
+    if device is not None:
+        assert device in core.available_devices, (
+            "Specified device "
+            + device
+            + " is not in the list of OpenVINO Available Devices"
+        )
+    else:
+        device = "CPU"
+    return device
+def _is_cache_dir_in_config(options) -> Optional[Any]:
+    if options is not None and "config" in options:
+        cfg = options["config"]
+        if cfg is not None and "CACHE_DIR" in cfg:
+            return True
+    return False
+def _get_cache_dir(options) -> Optional[Any]:
+    cache_dir = "./cache"
+    if options is not None and "cache_dir" in options:
+        cache_dir = options["cache_dir"]
+    if _is_cache_dir_in_config(options):
+        cache_dir = options["config"]["CACHE_DIR"]
+    return cache_dir
+def _get_aot_autograd(options) -> Optional[Any]:
+    if options is not None and "aot_autograd" in options:
+        aot_autograd = options["aot_autograd"]
+        if bool(aot_autograd) and str(aot_autograd).lower() not in ["false", "0"]:
+            return True
+        else:
+            return False
+def _get_model_caching(options) -> Optional[Any]:
+    if options is not None and "model_caching" in options:
+        caching = options["model_caching"]
+        if bool(caching) and str(caching).lower() not in ["false", "0"]:
+            return True
+    return False
+def _get_config(options) -> Optional[Any]:
+    if options is not None and "config" in options:
+        return options["config"]
+    return {}
+def _get_decompositions(options) -> Optional[Any]:
+    decompositions = []
+    if options is not None and "decompositions" in options:
+        decompositions = options["decompositions"]
+    return decompositions
+def _get_disabled_ops(options) -> Optional[Any]:
+    disabled_ops = []
+    if options is not None and "disabled_ops" in options:
+        disabled_ops = options["disabled_ops"]
+    return disabled_ops
+def _is_testing(options) -> Optional[Any]:
+    if options is not None and "testing" in options:
+        is_testing = options["testing"]
+        if bool(is_testing) and str(is_testing).lower not in ["false", "0"]:
+            return True
+    return False