PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

intel_npu_acceleration_library/backend/utils.py ADDED Viewed

@@ -0,0 +1,70 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from functools import lru_cache
+from .bindings import lib
+import warnings
+import sys
+__min_npu_driver_version__ = 2408
+@lru_cache
+def npu_available() -> bool:
+    """Return if the NPU is available.
+    Returns:
+        bool: Return True if the NPU is available in the system
+    """
+    return lib.isNPUAvailable()
+def get_driver_installation_url() -> str:
+    """Get the driver installation URL.
+    Returns:
+        std: Return the driver installation url
+    """
+    if sys.platform == "win32":
+        return "Driver Update URL: https://www.intel.com/content/www/us/en/download/794734/intel-npu-driver-windows.html"
+    elif sys.platform == "linux":
+        return "Driver Update URL: https://github.com/intel/linux-npu-driver"
+    else:
+        return ""
+@lru_cache
+def get_driver_version() -> int:
+    """Get the driver version for the Intel® NPU Acceleration Library.
+    Raises:
+        RuntimeError: an error is raised if the platform is not supported. Currently supported platforms are Windows and Linux
+    Returns:
+        int: NPU driver version
+    """
+    if not npu_available():
+        raise RuntimeError("NPU is not available on this system")
+    return lib.getNPUDriverVersion()
+def check_npu_and_driver_version():
+    """Check NPU and driver version."""
+    if not npu_available():
+        warnings.warn(
+            "NPU is not available in your system. Library will fallback to AUTO device selection mode",
+            stacklevel=2,
+        )
+    elif get_driver_version() < __min_npu_driver_version__:
+        warnings.warn(
+            f"\nWarning: Outdated Driver Detected!!!\n"
+            f"Current Driver Version: {get_driver_version()}, Minimum Required Version: {__min_npu_driver_version__}\n"
+            f"Using an outdated driver may result in reduced performance and unexpected errors and crashes"
+            f"To avoid these issues, please update your driver to the latest version.\n"
+            f"{get_driver_installation_url()}\n",
+            stacklevel=2,
+        )

intel_npu_acceleration_library/compiler.py ADDED Viewed

@@ -0,0 +1,194 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.optimizations import horizontal_fusion_linear
+from transformers.models.llama.modeling_llama import LlamaMLP, LlamaAttention
+from transformers.models.gemma.modeling_gemma import GemmaMLP, GemmaAttention
+from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear
+from intel_npu_acceleration_library.quantization import quantize_model
+from intel_npu_acceleration_library.dtypes import int8, int4
+import intel_npu_acceleration_library.nn as nn
+from torch._dynamo import register_backend
+from typing import Union, Callable, Any
+from typing import List
+import torch
+def compile(
+    model: torch.nn.Module, dtype: torch.dtype = torch.float16, training: bool = False
+) -> torch.nn.Module:
+    """Compile a model for the NPU.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+        dtype (torch.dtype): the model target datatype, default to torch.float16
+        training (bool): enable training. Default disabled
+    Raises:
+        RuntimeError: invalid datatypes
+    Returns:
+        torch.nn.Module: compiled NPU nn.Module
+    """
+    if not (dtype.is_floating_point or dtype in (int8, int4)):
+        raise RuntimeError(
+            f"intel-npu-acceleration-library library do not support yet the requeste datatype: {dtype}"
+        )
+    # Prepare and optimize model for NPU
+    with torch.no_grad():
+        # General optimizations
+        apply_general_optimizations(model)
+        if dtype in (int8, int4):
+            # Quantize model
+            model = quantize_model(model, dtype)
+        # Model lowering to NPU ops
+        create_npu_kernels(model)
+    if dtype.is_floating_point and training:
+        # Set model to evaluation only as quantized training is not supported yet
+        return model
+    return model.eval()
+def apply_general_optimizations(model: torch.nn.Module):
+    """Apply general optimizations to a torch.nn.Module.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+    """
+    apply_horizontal_fusion(model)
+    optimize_llama_attention(model)
+def create_npu_kernels(model: torch.nn.Module):
+    """Create NPU kernels.
+    Args:
+        model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
+    """
+    lower_linear(model)
+def module_optimization(func: Callable) -> torch.nn.Module:
+    """Optimize recursively a torch.nn.Module with a specific function.
+    The function `func` get called recursively to every module in the network.
+    Args:
+        func (Callable): optimization function
+    Returns:
+        torch.nn.Module: optimized module
+    """
+    def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
+        """Recursively apply the optimization function.
+        Args:
+            model (torch.nn.Module): original module
+            args (Any): positional arguments
+            kwargs (Any): keyword arguments
+        """
+        for name, layer in model.named_children():
+            new_layer = func(name, layer, *args, **kwargs)
+            if new_layer:
+                model.add_module(name, new_layer)
+                wrapper(new_layer, *args, **kwargs)
+            else:
+                wrapper(layer, *args, **kwargs)
+    return wrapper
+@module_optimization
+def lower_linear(name: str, layer: torch.nn.Module) -> Union[torch.nn.Module, None]:
+    """Lower torch.nn.Linear layer to NPU equivalent operators.
+    Args:
+        name (str): Layer name
+        layer (torch.nn.Module): Original torch.nn.Linear module
+    Raises:
+        RuntimeError: unsupported quantization bits
+    Returns:
+        Union[torch.nn.Module, None]: Return the new NPU operator or None
+    """
+    if isinstance(layer, torch.nn.Linear):
+        return nn.Linear.fromTorch(layer)
+    if isinstance(layer, torch.nn.Conv2d):
+        return nn.Conv2d.fromTorch(layer)
+    if isinstance(layer, WeightOnlyLinear):
+        if layer.bits == 4:
+            return nn.QuantizedLinear(
+                layer.qweight.to(torch.uint8), layer.scales, layer.bias
+            )
+        elif layer.bits == 8:
+            return nn.QuantizedLinear(
+                layer.qweight.view(torch.int8), layer.scales, layer.bias
+            )
+        else:
+            raise RuntimeError(f"Unsupported quantization bits: {layer.bits}")
+    return None
+@module_optimization
+def apply_horizontal_fusion(
+    name: str, layer: torch.nn.Module
+) -> Union[torch.nn.Module, None]:
+    """Apply horizontal fusion (merging two linear layers with same input) when necessary.
+    Args:
+        name (str): Layer name
+        layer (torch.nn.Module): Original module
+    Returns:
+        Union[torch.nn.Module, None]: optimized module
+    """
+    if isinstance(layer, (LlamaMLP, GemmaMLP)):
+        return horizontal_fusion_linear(layer)
+    return None
+@module_optimization
+def optimize_llama_attention(
+    name: str, layer: torch.nn.Module
+) -> Union[torch.nn.Module, None]:
+    """Optimize LLAMA attention block.
+    Args:
+        name (str): Module name
+        layer (torch.nn.Module): Original Module
+    Returns:
+        Union[torch.nn.Module, None]: optimized llama module
+    """
+    if isinstance(layer, (LlamaAttention, GemmaAttention)):
+        return nn.LlamaAttention.fromTorch(layer)
+    return None
+@register_backend
+def npu(
+    gm: Union[torch.nn.Module, torch.fx.GraphModule], example_inputs: List[torch.Tensor]
+) -> Union[torch.nn.Module, torch.fx.GraphModule]:
+    """Implement the custom torch 2.0 compile backend for the NPU.
+    Args:
+        gm (Union[torch.nn.Module, torch.fx.GraphModule]): The torch fx Module
+        example_inputs (List[torch.Tensor]): A list of example inputs
+    Returns:
+        Union[torch.nn.Module, torch.fx.GraphModule]: The compiled model
+    """
+    # Run some optimizations
+    gm = horizontal_fusion_linear(gm)
+    # For now compile in fp16
+    return compile(gm)

intel_npu_acceleration_library/device.py ADDED Viewed

@@ -0,0 +1,230 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.nn.module import convert_to_npu_module
+from torch.overrides import TorchFunctionMode
+from functools import lru_cache
+from typing import Any, MutableMapping
+import torch
+class NPUDevice(TorchFunctionMode):
+    """
+    Represents an NPU device.
+    This class extends the `TorchFunctionMode` class and provides an implementation
+    for the `__torch_function__` method.
+    Attributes:
+        IMPLEMENTATIONS (MutableMapping[Any, Any]): A dictionary mapping functions to their implementations.
+    Methods:
+        __torch_function__(func, types, args=(), kwargs=None): Overrides the `__torch_function__`
+            method to provide custom behavior for torch functions.
+    """
+    IMPLEMENTATIONS: MutableMapping[Any, Any] = {}
+    def __torch_function__(
+        self, func: Any, types: Any, args: Any = (), kwargs: Any = None
+    ):
+        """
+        Override the torch function behavior for the device class.
+        Args:
+            func (Any): The torch function being called.
+            types (Any): The types of the arguments being passed to the function.
+            args (Any, optional): The positional arguments being passed to the function. Defaults to ().
+            kwargs (Any, optional): The keyword arguments being passed to the function. Defaults to None.
+        Returns:
+            Any: The result of the torch function call.
+        """
+        def super_fn(*args: Any, **kwargs: Any):
+            """Disable torch_function and returns the result of calling the `func` function with the given arguments and keyword arguments.
+            Parameters:
+                args (Any): Variable length argument list.
+                kwargs (Any): Arbitrary keyword arguments.
+            Returns:
+                Any: The result of calling the `func` function with the given arguments and keyword arguments.
+            """
+            # Disable torch_function by hand because we don't want the wrapping behavior of
+            # the super() impl
+            # with torch._C.DisableTorchFunction():
+            return func(*args, **kwargs)
+        if func in self.IMPLEMENTATIONS:
+            return self.IMPLEMENTATIONS[func](super_fn, *args, **kwargs or {})
+        # This is just a no-op for all the non-factory functions:
+        return super_fn(*args, **kwargs or {})
+# Convenient wrapper to register functions
+def implements_factory(func: Any):
+    """
+    Register a decorator function that implements a factory function.
+    Args:
+        func (Any): The factory function to register an implementation for.
+    Returns:
+        Callable: The decorated implementation function.
+    """
+    def _inner_fn(impl: Any):
+        """
+        Implement a decorator used to register an implementation for a specific function.
+        Args:
+            impl (Any): The implementation to be registered.
+        Returns:
+            Any: The registered implementation.
+        """
+        NPUDevice.IMPLEMENTATIONS[func] = impl
+        return impl
+    return _inner_fn
+def parse_to_arguments(*args: Any, **kwargs: Any):
+    """
+    Parse the arguments and keyword arguments to handle device selection.
+    Args:
+        args: Variable length argument list.
+        kwargs: Arbitrary keyword arguments.
+    Returns:
+        Tuple: A tuple containing the following:
+            - npu_device (bool): Indicates whether the device is an NPU device.
+            - new_args (list): List of modified arguments.
+            - kwargs (dict): Dictionary of modified keyword arguments.
+    """
+    device = kwargs.get("device", None)
+    npu_device = False
+    if device == "npu":
+        npu_device = True
+        kwargs["device"] = "cpu"
+    new_args = []
+    for arg in args:
+        if arg == "npu":
+            npu_device = True
+            new_args.append("cpu")
+        else:
+            new_args.append(arg)
+    return npu_device, new_args, kwargs
+@implements_factory(torch.device)
+def device(super_fn: Any, device, *args: Any, **kwargs: Any):
+    """
+    Return the device based on the input device name.
+    Args:
+        super_fn (Any): The super function to call.
+        device (str): The name of the device.
+        args (Any): Additional positional arguments to pass to the super function.
+        kwargs (Any): Additional keyword arguments to pass to the super function.
+    Returns:
+        torch.device: The device object.
+    """
+    if device == "npu":
+        # Patch the device to return the NPU device
+        return torch.device("cpu")
+    return super_fn(device, *args, **kwargs)
+@implements_factory(torch.Tensor.to)
+def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
+    """
+    Convert the tensor to the specified device.
+    Args:
+        super_fn: The super function to call.
+        args: Additional positional arguments.
+        kwargs: Additional keyword arguments.
+    Returns:
+        The converted tensor.
+    Note:
+        This implementation only supports a subset of the `.to()` functionality.
+        Once the remote tensor feature is available, it can be converted to a remote tensor.
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
+    if npu_device:
+        # None for now, once the remote tensor feature lands, it can be converted to a remote tensor
+        pass
+    return super_fn(self, *args, **kwargs)
+@implements_factory(torch._C._nn._parse_to)
+def _parse_to(super_fn: Any, *args: Any, **kwarg: Any):
+    """
+    Parse the arguments and return the device, dtype, non_blocking, and convert_to_format.
+    Args:
+        super_fn (Any): The super function to call.
+        args (Any): Positional arguments.
+        kwarg (Any): Keyword arguments.
+    Returns:
+        Tuple: A tuple containing the device, dtype, non_blocking, and convert_to_format.
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, **kwarg)
+    device, dtype, non_blocking, convert_to_format = super_fn(*args, *kwargs)
+    if npu_device:
+        device = "npu"
+    return device, dtype, non_blocking, convert_to_format
+def new_to(self, *args: Any, **kwargs: Any):
+    """
+    Move the input tensor(s) to the specified device.
+    Args:
+        args: Variable length argument list of devices to move the tensor(s) to.
+        kwargs: Keyword arguments for the `to` method.
+    Returns:
+        Tensor or Module: The tensor or module with the tensor(s) moved to the specified device(s).
+    """
+    npu_device, args, kwargs = parse_to_arguments(*args, *kwargs)
+    if npu_device:
+        self = convert_to_npu_module(self).to("npu")
+    return self._to(*args, **kwargs)
+@lru_cache()
+def enable_npu_device():
+    """
+    Enable the NPU device for acceleration.
+    This function globally enables the NPU device mode by creating an instance of `NPUDevice` and
+    modifying the `torch.nn.Module.to` method to use a custom implementation called `new_to`.
+    Usage:
+        enable_npu_device()
+    """
+    holder = NPUDevice()
+    holder.__enter__()
+    torch.nn.Module._to = torch.nn.Module.to
+    torch.nn.Module.to = new_to

intel_npu_acceleration_library/dtypes.py ADDED Viewed

@@ -0,0 +1,122 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from dataclasses import dataclass
+from typing import Union
+import numpy as np
+import torch
+@dataclass(frozen=True)
+class NPUDtype:
+    """Represents a custom data type for NPUs (Neural Processing Units).
+    Attrs:
+        name: str: The name of the data type.
+        bits: int: The number of bits used to represent the data type.
+        min: int: The minimum value that can be represented by the data type.
+        max: int: The maximum value that can be represented by the data type.
+        torch_dtype: torch.dtype: The corresponding torch data type.
+        is_floating_point: bool: True if the data type is floating-point, False otherwise.
+    """
+    name: str
+    bits: int
+    min: int
+    max: int
+    torch_dtype: torch.dtype
+    @property
+    def is_floating_point(self) -> bool:
+        """
+        Check if the data type is a floating-point type.
+        Returns:
+            bool: True if the data type is floating-point, False otherwise.
+        """
+        return self.torch_dtype.is_floating_point
+    def __eq__(self, value: Union["NPUDtype", torch.dtype]) -> bool:
+        """
+        Compare the NPUDtype object with another NPUDtype or torch.dtype object.
+        Args:
+            value (Union["NPUDtype", torch.dtype]): The object to compare with.
+        Returns:
+            bool: True if the objects are equal, False otherwise.
+        """
+        if isinstance(value, torch.dtype):
+            if value.is_floating_point:
+                info = torch.finfo(value)
+            else:
+                info = torch.iinfo(value)
+            return (
+                self.bits == info.bits
+                and self.max == info.max
+                and self.min == info.min
+                and self.torch_dtype == value
+            )
+        if isinstance(value, type):
+            value = np.dtype(value)
+            if value.kind == "f":
+                info = np.finfo(value)
+            else:
+                info = np.iinfo(value)
+            return (
+                self.bits == info.bits and self.max == info.max and self.min == info.min
+            )
+        else:
+            return super().__eq__(value)
+    def __repr__(self) -> str:
+        """
+        Return a string representation of the NPUDtype object.
+        Returns:
+            str: The string representation of the NPUDtype object.
+        """
+        return self.name
+float16 = NPUDtype(
+    "fp16",
+    16,
+    torch.finfo(torch.float16).min,
+    torch.finfo(torch.float16).max,
+    torch.float16,
+)
+bfloat16 = NPUDtype(
+    "bf16",
+    16,
+    torch.finfo(torch.bfloat16).min,
+    torch.finfo(torch.bfloat16).max,
+    torch.bfloat16,
+)
+float32 = NPUDtype(
+    "fp32",
+    32,
+    torch.finfo(torch.float32).min,
+    torch.finfo(torch.float32).max,
+    torch.float32,
+)
+float64 = NPUDtype(
+    "fp64",
+    64,
+    torch.finfo(torch.float64).min,
+    torch.finfo(torch.float64).max,
+    torch.float64,
+)
+int4 = NPUDtype("int4", 4, -8, 7, torch.int8)
+int8 = NPUDtype("int8", 8, -128, 127, torch.int8)
+int16 = NPUDtype(
+    "int16", 16, torch.iinfo(torch.int16).min, torch.iinfo(torch.int16).max, torch.int16
+)
+int32 = NPUDtype(
+    "int32", 32, torch.iinfo(torch.int32).min, torch.iinfo(torch.int32).max, torch.int32
+)
+int64 = NPUDtype(
+    "int64", 64, torch.iinfo(torch.int64).min, torch.iinfo(torch.int64).max, torch.int64
+)

intel_npu_acceleration_library/external/openvino/__init__.py ADDED Viewed

@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
+# Required for Windows OS platforms
+# Note: always top-level
+try:
+    from openvino.utils import _add_openvino_libs_to_search_path
+    _add_openvino_libs_to_search_path()
+except ImportError:
+    pass
+# #
+# # OpenVINO API
+# # This __init__.py forces checking of runtime modules to propagate errors.
+# # It is not compared with init files from openvino-dev package.
+# #
+# Import all public modules
+from openvino import runtime as runtime
+from openvino import frontend as frontend
+from openvino import helpers as helpers
+from openvino import preprocess as preprocess
+from openvino import utils as utils
+from openvino import properties as properties
+# Import most important classes and functions from openvino.runtime
+from openvino.runtime import Model
+from openvino.runtime import Core
+from openvino.runtime import CompiledModel
+from openvino.runtime import InferRequest
+from openvino.runtime import AsyncInferQueue
+from openvino.runtime import Symbol
+from openvino.runtime import Dimension
+from openvino.runtime import Strides
+from openvino.runtime import PartialShape
+from openvino.runtime import Shape
+from openvino.runtime import Layout
+from openvino.runtime import Type
+from openvino.runtime import Tensor
+from openvino.runtime import OVAny
+from openvino.runtime import compile_model
+from openvino.runtime import get_batch
+from openvino.runtime import set_batch
+from openvino.runtime import serialize
+from openvino.runtime import shutdown
+from openvino.runtime import tensor_from_file
+from openvino.runtime import save_model
+from openvino.runtime import layout_helpers
+from openvino._pyopenvino import RemoteContext
+from openvino._pyopenvino import RemoteTensor
+from openvino._pyopenvino import Op
+# libva related:
+from openvino._pyopenvino import VAContext
+from openvino._pyopenvino import VASurfaceTensor
+# Set version for openvino package
+from openvino.runtime import get_version
+__version__ = get_version()
+# Tools
+try:
+    # Model Conversion API - ovc should reside in the main namespace
+    from openvino.tools.ovc import convert_model
+except ImportError:
+    pass

intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+from openvino._pyopenvino import get_version
+__version__ = get_version()
+from openvino._pyopenvino._offline_transformations import apply_fused_names_cleanup
+from openvino._pyopenvino._offline_transformations import apply_moc_transformations
+from openvino._pyopenvino._offline_transformations import apply_moc_legacy_transformations
+from openvino._pyopenvino._offline_transformations import apply_low_latency_transformation
+from openvino._pyopenvino._offline_transformations import apply_pruning_transformation
+from openvino._pyopenvino._offline_transformations import apply_make_stateful_transformation
+from openvino._pyopenvino._offline_transformations import compress_model_transformation
+from openvino._pyopenvino._offline_transformations import compress_quantize_weights_transformation
+from openvino._pyopenvino._offline_transformations import convert_sequence_to_tensor_iterator_transformation
+from openvino._pyopenvino._offline_transformations import paged_attention_transformation