PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20250114__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

intel_npu_acceleration_library/backend/utils.py ADDED Viewed

@@ -0,0 +1,70 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from functools import lru_cache
+from .bindings import lib
+import warnings
+import sys
+__min_npu_driver_version__ = 2408
+@lru_cache
+def npu_available() -> bool:
+    """Return if the NPU is available.
+    Returns:
+        bool: Return True if the NPU is available in the system
+    """
+    return lib.isNPUAvailable()
+def get_driver_installation_url() -> str:
+    """Get the driver installation URL.
+    Returns:
+        std: Return the driver installation url
+    """
+    if sys.platform == "win32":
+        return "Driver Update URL: https://www.intel.com/content/www/us/en/download/794734/intel-npu-driver-windows.html"
+    elif sys.platform == "linux":
+        return "Driver Update URL: https://github.com/intel/linux-npu-driver"
+    else:
+        return ""
+@lru_cache
+def get_driver_version() -> int:
+    """Get the driver version for the Intel® NPU Acceleration Library.
+    Raises:
+        RuntimeError: an error is raised if the platform is not supported. Currently supported platforms are Windows and Linux
+    Returns:
+        int: NPU driver version
+    """
+    if not npu_available():
+        raise RuntimeError("NPU is not available on this system")
+    return lib.getNPUDriverVersion()
+def check_npu_and_driver_version():
+    """Check NPU and driver version."""
+    if not npu_available():
+        warnings.warn(
+            "NPU is not available in your system. Library will fallback to AUTO device selection mode",
+            stacklevel=2,
+        )
+    elif get_driver_version() < __min_npu_driver_version__:
+        warnings.warn(
+            f"\nWarning: Outdated Driver Detected!!!\n"
+            f"Current Driver Version: {get_driver_version()}, Minimum Required Version: {__min_npu_driver_version__}\n"
+            f"Using an outdated driver may result in reduced performance and unexpected errors and crashes"
+            f"To avoid these issues, please update your driver to the latest version.\n"
+            f"{get_driver_installation_url()}\n",
+            stacklevel=2,
+        )

intel_npu_acceleration_library/compiler.py ADDED Viewed

@@ -0,0 +1,194 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.optimizations import horizontal_fusion_linear
+from transformers.models.llama.modeling_llama import LlamaMLP, LlamaAttention
+from transformers.models.gemma.modeling_gemma import GemmaMLP, GemmaAttention
+from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear
+from intel_npu_acceleration_library.quantization import quantize_model
+from intel_npu_acceleration_library.dtypes import int8, int4
+import intel_npu_acceleration_library.nn as nn
+from torch._dynamo import register_backend
+from typing import Union, Callable, Any
+from typing import List
+import torch
+def compile(
+    model: torch.nn.Module, dtype: torch.dtype = torch.float16, training: bool = False
+) -> torch.nn.Module:
+    """Compile a model for the NPU.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+        dtype (torch.dtype): the model target datatype, default to torch.float16
+        training (bool): enable training. Default disabled
+    Raises:
+        RuntimeError: invalid datatypes
+    Returns:
+        torch.nn.Module: compiled NPU nn.Module
+    """
+    if not (dtype.is_floating_point or dtype in (int8, int4)):
+        raise RuntimeError(
+            f"intel-npu-acceleration-library library do not support yet the requeste datatype: {dtype}"
+        )
+    # Prepare and optimize model for NPU
+    with torch.no_grad():
+        # General optimizations
+        apply_general_optimizations(model)
+        if dtype in (int8, int4):
+            # Quantize model
+            model = quantize_model(model, dtype)
+        # Model lowering to NPU ops
+        create_npu_kernels(model)
+    if dtype.is_floating_point and training:
+        # Set model to evaluation only as quantized training is not supported yet
+        return model
+    return model.eval()
+def apply_general_optimizations(model: torch.nn.Module):
+    """Apply general optimizations to a torch.nn.Module.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+    """
+    apply_horizontal_fusion(model)
+    optimize_llama_attention(model)
+def create_npu_kernels(model: torch.nn.Module):
+    """Create NPU kernels.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+    """
+    lower_linear(model)
+def module_optimization(func: Callable) -> torch.nn.Module:
+    """Optimize recursively a torch.nn.Module with a specific function.
+    The function `func` get called recursively to every module in the network.
+    Args:
+        func (Callable): optimization function
+    Returns:
+        torch.nn.Module: optimized module
+    """
+    def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
+        """Recursively apply the optimization function.
+        Args:
+            model (torch.nn.Module): original module
+            args (Any): positional arguments
+            kwargs (Any): keyword arguments
+        """
+        for name, layer in model.named_children():
+            new_layer = func(name, layer, *args, **kwargs)
+            if new_layer:
+                model.add_module(name, new_layer)
+                wrapper(new_layer, *args, **kwargs)
+            else:
+                wrapper(layer, *args, **kwargs)
+    return wrapper
+@module_optimization
+def lower_linear(name: str, layer: torch.nn.Module) -> Union[torch.nn.Module, None]:
+    """Lower torch.nn.Linear layer to NPU equivalent operators.
+    Args:
+        name (str): Layer name
+        layer (torch.nn.Module): Original torch.nn.Linear module
+    Raises:
+        RuntimeError: unsupported quantization bits
+    Returns:
+        Union[torch.nn.Module, None]: Return the new NPU operator or None
+    """
+    if isinstance(layer, torch.nn.Linear):
+        return nn.Linear.fromTorch(layer)
+    if isinstance(layer, torch.nn.Conv2d):
+        return nn.Conv2d.fromTorch(layer)
+    if isinstance(layer, WeightOnlyLinear):
+        if layer.bits == 4:
+            return nn.QuantizedLinear(
+                layer.qweight.to(torch.uint8), layer.scales, layer.bias
+            )
+        elif layer.bits == 8:
+            return nn.QuantizedLinear(
+                layer.qweight.view(torch.int8), layer.scales, layer.bias
+            )
+        else:
+            raise RuntimeError(f"Unsupported quantization bits: {layer.bits}")
+    return None
+@module_optimization
+def apply_horizontal_fusion(
+    name: str, layer: torch.nn.Module
+) -> Union[torch.nn.Module, None]:
+    """Apply horizontal fusion (merging two linear layers with same input) when necessary.
+    Args:
+        name (str): Layer name
+        layer (torch.nn.Module): Original module
+    Returns:
+        Union[torch.nn.Module, None]: optimized module
+    """
+    if isinstance(layer, (LlamaMLP, GemmaMLP)):
+        return horizontal_fusion_linear(layer)
+    return None
+@module_optimization
+def optimize_llama_attention(
+    name: str, layer: torch.nn.Module
+) -> Union[torch.nn.Module, None]:
+    """Optimize LLAMA attention block.
+    Args:
+        name (str): Module name
+        layer (torch.nn.Module): Original Module
+    Returns:
+        Union[torch.nn.Module, None]: optimized llama module
+    """
+    if isinstance(layer, (LlamaAttention, GemmaAttention)):
+        return nn.LlamaAttention.fromTorch(layer)
+    return None
+@register_backend
+def npu(
+    gm: Union[torch.nn.Module, torch.fx.GraphModule], example_inputs: List[torch.Tensor]
+) -> Union[torch.nn.Module, torch.fx.GraphModule]:
+    """Implement the custom torch 2.0 compile backend for the NPU.
+    Args:
+        gm (Union[torch.nn.Module, torch.fx.GraphModule]): The torch fx Module
+        example_inputs (List[torch.Tensor]): A list of example inputs
+    Returns:
+        Union[torch.nn.Module, torch.fx.GraphModule]: The compiled model
+    """
+    # Run some optimizations
+    gm = horizontal_fusion_linear(gm)
+    # For now compile in fp16
+    return compile(gm)

intel_npu_acceleration_library/device.py ADDED Viewed

@@ -0,0 +1,230 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.nn.module import convert_to_npu_module
+from intel_npu_acceleration_library.backend.tensor import RemoteTensor
+from torch.overrides import TorchFunctionMode
+from functools import lru_cache
+from typing import Any, MutableMapping
+import torch
+class NPUDevice(TorchFunctionMode):
+    """
+    Represents an NPU device.
+    This class extends the `TorchFunctionMode` class and provides an implementation
+    for the `__torch_function__` method.
+    Attributes:
+        IMPLEMENTATIONS (MutableMapping[Any, Any]): A dictionary mapping functions to their implementations.
+    Methods:
+        __torch_function__(func, types, args=(), kwargs=None): Overrides the `__torch_function__`
+            method to provide custom behavior for torch functions.
+    """
+    IMPLEMENTATIONS: MutableMapping[Any, Any] = {}
+    def __torch_function__(
+        self, func: Any, types: Any, args: Any = (), kwargs: Any = None
+    ):
+        """
+        Override the torch function behavior for the device class.
+        Args:
+            func (Any): The torch function being called.
+            types (Any): The types of the arguments being passed to the function.
+            args (Any, optional): The positional arguments being passed to the function. Defaults to ().
+            kwargs (Any, optional): The keyword arguments being passed to the function. Defaults to None.
+        Returns:
+            Any: The result of the torch function call.
+        """
+        def super_fn(*args: Any, **kwargs: Any):
+            """Disable torch_function and returns the result of calling the `func` function with the given arguments and keyword arguments.
+            Parameters:
+                args (Any): Variable length argument list.
+                kwargs (Any): Arbitrary keyword arguments.
+            Returns:
+                Any: The result of calling the `func` function with the given arguments and keyword arguments.
+            """
+            # Disable torch_function by hand because we don't want the wrapping behavior of
+            # the super() impl
+            # with torch._C.DisableTorchFunction():
+            return func(*args, **kwargs)
+        if func in self.IMPLEMENTATIONS:
+            return self.IMPLEMENTATIONS[func](super_fn, *args, **kwargs or {})
+        # This is just a no-op for all the non-factory functions:
+        return super_fn(*args, **kwargs or {})
+# Convenient wrapper to register functions
+def implements_factory(func: Any):
+    """
+    Register a decorator function that implements a factory function.
+    Args:
+        func (Any): The factory function to register an implementation for.
+    Returns:
+        Callable: The decorated implementation function.
+    """
+    def _inner_fn(impl: Any):
+        """
+        Implement a decorator used to register an implementation for a specific function.
+        Args:
+            impl (Any): The implementation to be registered.
+        Returns:
+            Any: The registered implementation.
+        """
+        NPUDevice.IMPLEMENTATIONS[func] = impl
+        return impl
+    return _inner_fn
+def parse_to_arguments(*args: Any, **kwargs: Any):
+    """
+    Parse the arguments and keyword arguments to handle device selection.
+    Args:
+        args: Variable length argument list.
+        kwargs: Arbitrary keyword arguments.
+    Returns:
+        Tuple: A tuple containing the following:
+            - npu_device (bool): Indicates whether the device is an NPU device.
+            - new_args (list): List of modified arguments.
+            - kwargs (dict): Dictionary of modified keyword arguments.
+    """
+    device = kwargs.get("device", None)
+    npu_device = False
+    if device == "npu":
+        npu_device = True
+        kwargs["device"] = "cpu"
+    new_args = []
+    for arg in args:
+        if arg == "npu":
+            npu_device = True
+            new_args.append("cpu")
+        else:
+            new_args.append(arg)
+    return npu_device, new_args, kwargs
+@implements_factory(torch.device)
+def device(super_fn: Any, device, *args: Any, **kwargs: Any):
+    """
+    Return the device based on the input device name.
+    Args:
+        super_fn (Any): The super function to call.
+        device (str): The name of the device.
+        args (Any): Additional positional arguments to pass to the super function.
+        kwargs (Any): Additional keyword arguments to pass to the super function.
+    Returns:
+        torch.device: The device object.
+    """
+    if device == "npu":
+        # Patch the device to return the NPU device
+        return torch.device("cpu")
+    return super_fn(device, *args, **kwargs)
+@implements_factory(torch.Tensor.to)
+def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
+    """
+    Convert the tensor to the specified device.
+    Args:
+        super_fn: The super function to call.
+        args: Additional positional arguments.
+        kwargs: Additional keyword arguments.
+    Returns:
+        The converted tensor.
+    Note:
+        This implementation only supports a subset of the `.to()` functionality.
+        Once the remote tensor feature is available, it can be converted to a remote tensor.
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
+    if npu_device:
+        return super_fn(RemoteTensor.from_torch(self), *args, **kwargs)
+    return super_fn(self, *args, **kwargs)
+@implements_factory(torch._C._nn._parse_to)
+def _parse_to(super_fn: Any, *args: Any, **kwarg: Any):
+    """
+    Parse the arguments and return the device, dtype, non_blocking, and convert_to_format.
+    Args:
+        super_fn (Any): The super function to call.
+        args (Any): Positional arguments.
+        kwarg (Any): Keyword arguments.
+    Returns:
+        Tuple: A tuple containing the device, dtype, non_blocking, and convert_to_format.
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, **kwarg)
+    device, dtype, non_blocking, convert_to_format = super_fn(*args, *kwargs)
+    if npu_device:
+        device = "npu"
+    return device, dtype, non_blocking, convert_to_format
+def new_to(self, *args: Any, **kwargs: Any):
+    """
+    Move the input tensor(s) to the specified device.
+    Args:
+        args: Variable length argument list of devices to move the tensor(s) to.
+        kwargs: Keyword arguments for the `to` method.
+    Returns:
+        Tensor or Module: The tensor or module with the tensor(s) moved to the specified device(s).
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, *kwargs)
+    if npu_device:
+        self = convert_to_npu_module(self).to("npu")
+    return self._to(*args, **kwargs)
+@lru_cache()
+def enable_npu_device():
+    """
+    Enable the NPU device for acceleration.
+    This function globally enables the NPU device mode by creating an instance of `NPUDevice` and
+    modifying the `torch.nn.Module.to` method to use a custom implementation called `new_to`.
+    Usage:
+        enable_npu_device()
+    """
+    holder = NPUDevice()
+    holder.__enter__()
+    torch.nn.Module._to = torch.nn.Module.to
+    torch.nn.Module.to = new_to

intel_npu_acceleration_library/dtypes.py ADDED Viewed

@@ -0,0 +1,155 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from dataclasses import dataclass
+from typing import Union
+import numpy as np
+import torch
+import ctypes
+@dataclass(frozen=True)
+class NPUDtype:
+    """Represents a custom data type for NPUs (Neural Processing Units).
+    Attrs:
+        name: str: The name of the data type.
+        bits: int: The number of bits used to represent the data type.
+        min: int: The minimum value that can be represented by the data type.
+        max: int: The maximum value that can be represented by the data type.
+        torch_dtype: torch.dtype: The corresponding torch data type.
+        is_floating_point: bool: True if the data type is floating-point, False otherwise.
+    """
+    name: str
+    bits: int
+    min: int
+    max: int
+    torch_dtype: torch.dtype
+    @property
+    def is_floating_point(self) -> bool:
+        """
+        Check if the data type is a floating-point type.
+        Returns:
+            bool: True if the data type is floating-point, False otherwise.
+        """
+        return self.torch_dtype.is_floating_point
+    def __eq__(self, value: Union["NPUDtype", torch.dtype]) -> bool:
+        """
+        Compare the NPUDtype object with another NPUDtype or torch.dtype object.
+        Args:
+            value (Union["NPUDtype", torch.dtype]): The object to compare with.
+        Returns:
+            bool: True if the objects are equal, False otherwise.
+        """
+        if isinstance(value, torch.dtype):
+            if value.is_floating_point:
+                info = torch.finfo(value)
+            else:
+                info = torch.iinfo(value)
+            return (
+                self.bits == info.bits
+                and self.max == info.max
+                and self.min == info.min
+                and self.torch_dtype == value
+            )
+        if isinstance(value, type):
+            value = np.dtype(value)
+            if value.kind == "f":
+                info = np.finfo(value)
+            else:
+                info = np.iinfo(value)
+            return (
+                self.bits == info.bits and self.max == info.max and self.min == info.min
+            )
+        else:
+            return super().__eq__(value)
+    def __repr__(self) -> str:
+        """
+        Return a string representation of the NPUDtype object.
+        Returns:
+            str: The string representation of the NPUDtype object.
+        """
+        return self.name
+def get_backend_dtype(dtype) -> ctypes.c_char_p:
+    """Get the string representation of the dtype.
+    Args:
+        dtype: numpy dtype
+    Raises:
+        RuntimeError: Unsupported datatype
+    Returns:
+        ctypes.c_char_p: string representation of the dtype
+    """
+    if dtype in [np.int8, torch.int8]:
+        str_dtype = "int8"
+    elif dtype in [np.uint8, int4, torch.uint8]:
+        # u8 represents packed i4 dtypes
+        str_dtype = "int4"
+    elif dtype in [np.int16, torch.int16]:
+        str_dtype = "int16"
+    elif dtype in [np.int32, torch.int32]:
+        str_dtype = "int32"
+    elif dtype in [np.int64, torch.int64]:
+        str_dtype = "int64"
+    elif dtype in [np.float16, torch.float16]:
+        str_dtype = "float16"
+    elif dtype in [np.float32, torch.float32]:
+        str_dtype = "float32"
+    elif dtype in [np.float64, torch.float64]:
+        str_dtype = "float64"
+    elif dtype in [bfloat16, torch.bfloat16]:
+        str_dtype = "bfloat16"
+    else:
+        raise RuntimeError(f"DType is not supported {dtype}")
+    return ctypes.c_char_p(str_dtype.encode())
+float16 = NPUDtype(
+    "fp16",
+    16,
+    torch.finfo(torch.float16).min,
+    torch.finfo(torch.float16).max,
+    torch.float16,
+)
+bfloat16 = NPUDtype(
+    "bf16",
+    16,
+    torch.finfo(torch.bfloat16).min,
+    torch.finfo(torch.bfloat16).max,
+    torch.bfloat16,
+)
+float32 = NPUDtype(
+    "fp32",
+    32,
+    torch.finfo(torch.float32).min,
+    torch.finfo(torch.float32).max,
+    torch.float32,
+)
+float64 = NPUDtype(
+    "fp64",
+    64,
+    torch.finfo(torch.float64).min,
+    torch.finfo(torch.float64).max,
+    torch.float64,
+)
+int4 = NPUDtype("int4", 4, -8, 7, torch.int8)
+int8 = NPUDtype("int8", 8, -128, 127, torch.int8)
+int16 = NPUDtype(
+    "int16", 16, torch.iinfo(torch.int16).min, torch.iinfo(torch.int16).max, torch.int16
+)
+int32 = NPUDtype(
+    "int32", 32, torch.iinfo(torch.int32).min, torch.iinfo(torch.int32).max, torch.int32
+)
+int64 = NPUDtype(
+    "int64", 64, torch.iinfo(torch.int64).min, torch.iinfo(torch.int64).max, torch.int64
+)