PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20250114__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_c.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbb12.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbb12_debug.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbmalloc.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll ADDED Viewed

Binary file

intel_npu_acceleration_library/modelling.py ADDED Viewed

@@ -0,0 +1,150 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM
+import intel_npu_acceleration_library as npu_lib
+from functools import partialmethod
+from typing import Type, Any, Tuple, Optional
+import hashlib
+import torch
+import os
+def get_cache_dir() -> str:
+    """Get the model cache directory.
+    Returns:
+        str: path to the cache directory
+    """
+    return os.path.join("cache", "models")
+def get_mangled_model_name(model_name: str, *args: Any, **kwargs: Any) -> str:
+    """Mangle the model name with all the parameters.
+    Args:
+        model_name (str): model name or path
+        args (Any): positional arguments
+        kwargs (Any): keyword arguments
+    Returns:
+        str: mangled name
+    """
+    # append all input parameters and create a string
+    arguments_str = f"{[str(arg) for arg in args] + [f'{str(key)}_{str(arg)}' for key, arg in kwargs.items()]}"
+    arguments_str_hash = hashlib.sha256(arguments_str.encode("utf-8")).hexdigest()
+    mangled_model_name = f"{model_name}_{arguments_str_hash}_{npu_lib.__version__}"
+    return mangled_model_name.replace("\\", "_").replace("/", "_")
+def get_model_path(model_name: str, *args: Any, **kwargs: Any) -> Tuple[str, str]:
+    """Get the model path.
+    Args:
+        model_name (str): model name or path
+        args (Any): positional arguments
+        kwargs (Any): keyword arguments
+    Returns:
+        Tuple[str, str]: model directory and full path
+    """
+    cache_dir = get_cache_dir()
+    mangled_model_name = get_mangled_model_name(model_name, *args, **kwargs)
+    model_dir_path = os.path.join(cache_dir, mangled_model_name)
+    model_path = os.path.join(model_dir_path, "pytorch_npu_model.pt")
+    return model_dir_path, model_path
+class NPUModel:
+    """Base NPU model class."""
+    @staticmethod
+    def from_pretrained(
+        model_name_or_path: str,
+        dtype: torch.dtype = torch.float16,
+        training: bool = False,
+        transformers_class: Optional[Type] = None,
+        export=True,
+        *args: Any,
+        **kwargs: Any,
+    ) -> torch.nn.Module:
+        """Template for the `from_pretrained` static method.
+        Args:
+            model_name_or_path (str): model name or path
+            dtype (torch.dtype, optional): compilation dtype. Defaults to torch.float16.
+            training (bool, optional): enable training. Defaults to False.
+            transformers_class (Optional[Type], optional): base class to use. Must have a `from_pretrained` method. Defaults to None.
+            export (bool, optional): enable the caching of the model. Defaults to True.
+            args (Any): positional arguments
+            kwargs (Any): keyword arguments
+        Raises:
+            RuntimeError: Invalid class
+            AttributeError: Cannot export model with trust_remote_code=True
+        Returns:
+            torch.nn.Module: compiled mode
+        """
+        if transformers_class is None:
+            raise RuntimeError(f"Invalid transformer class {type(transformers_class)}")
+        # get the model cache dir and path from the name and arguments
+        model_dir_path, model_path = get_model_path(
+            model_name_or_path, dtype, training, *args, **kwargs
+        )
+        if os.path.isdir(model_dir_path) and os.path.isfile(model_path):
+            # Model already exist so I can load it directly
+            return torch.load(model_path)
+        else:
+            # Model does not exists, so I need to compile it first
+            print(f"Compiling model {model_name_or_path} {dtype} for the NPU")
+            model = transformers_class.from_pretrained(
+                model_name_or_path, *args, **kwargs
+            )
+            model = npu_lib.compile(model, dtype, training)
+            if export:
+                if kwargs.get("trust_remote_code", False):
+                    raise AttributeError(
+                        "Cannot export model with trust_remote_code=True. Please set trust_remote_code=False or export=False"
+                    )
+                print(f"Exporting model {model_name_or_path} to {model_dir_path}")
+                os.makedirs(model_dir_path, exist_ok=True)
+                torch.save(model, model_path)
+            return model
+class NPUAutoModel:
+    """NPU wrapper for AutoModel.
+    Attrs:
+        from_pretrained: Load a pretrained model
+    """
+    from_pretrained = partialmethod(
+        NPUModel.from_pretrained, transformers_class=AutoModel
+    )
+class NPUModelForCausalLM:
+    """NPU wrapper for AutoModelForCausalLM.
+    Attrs:
+        from_pretrained: Load a pretrained model
+    """
+    from_pretrained = partialmethod(
+        NPUModel.from_pretrained, transformers_class=AutoModelForCausalLM
+    )
+class NPUModelForSeq2SeqLM:
+    """NPU wrapper for AutoModelForSeq2SeqLM.
+    Attrs:
+        from_pretrained: Load a pretrained model
+    """
+    from_pretrained = partialmethod(
+        NPUModel.from_pretrained, transformers_class=AutoModelForSeq2SeqLM
+    )

intel_npu_acceleration_library/nn/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from .functional import *  # noqa
+from .linear import Linear, QuantizedLinear  # noqa
+from .conv import Conv2d  # noqa
+from .module import Module  # noqa
+try:
+    from .llm import LlamaAttention, PhiMLP  # noqa
+    llm_modules = ["LlamaAttention", "PhiMLP"]
+except ModuleNotFoundError:
+    # Transformer library is not installed
+    llm_modules = []
+__all__ = ["Module", "Linear", "QuantizedLinear", "Conv2d"] + llm_modules

intel_npu_acceleration_library/nn/autograd.py ADDED Viewed

@@ -0,0 +1,68 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.backend import run_matmul
+from typing import Optional, Iterable, Union
+import torch
+class AutogradMatMul(torch.autograd.Function):
+    """Autograd module for Linear operation."""
+    @staticmethod
+    def forward(
+        ctx, x: torch.Tensor, w: torch.Tensor, scale: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        """Run a linear forward pass. Depending on the datatype of the weights it runs a float or quantized operation.
+            Equivalent pytorch code:
+            result = x @ w.T
+        Args:
+            ctx (Any): the autograd context
+            x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
+            w (torch.Tensor): Weight tensor. Its dtype must be torch.float16
+            scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
+        Returns:
+            torch.Tensor: result
+        """
+        result = run_matmul(x, w, scale, None)
+        ctx.save_for_backward(w, x)
+        return result
+    @staticmethod
+    def backward(ctx, grad_output: torch.Tensor) -> Iterable[Union[torch.Tensor, None]]:
+        """Run a linear backward pass.
+        grad_output shape: [batch, output_channels]
+        x shape: [batch, input_channels]
+        w shape: [output_channels, input_channels]
+        Expected gradients
+        dl_dx shape: [batch, input_channels]
+        dl_dw shape: [output_channels, input_channels]
+        Equivalent pytorch code:
+        dl_dx = grad_output @ w.to(torch.float32)
+        dl_dw =  (x.T @ grad_output).T
+        Args:
+            ctx (Any): the autograd context
+            grad_output (torch.Tensor): output gradient
+        Returns:
+            Iterable[Union[torch.Tensor, None]]: Input and parameters gradients
+        """
+        (
+            w,
+            x,
+        ) = ctx.saved_tensors
+        dl_dx = run_matmul(grad_output, torch.transpose(w, -1, -2))
+        dl_dw = run_matmul(
+            torch.transpose(grad_output, -1, -2), torch.transpose(x, -1, -2)
+        )
+        return dl_dx, dl_dw, None

intel_npu_acceleration_library/nn/conv.py ADDED Viewed

@@ -0,0 +1,257 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.backend import run_factory, Convolution
+from intel_npu_acceleration_library.nn import Linear
+from typing import Optional, Sequence, Union
+from functools import partial
+import torch
+import uuid
+class Im2ColConv2d(torch.nn.Module):
+    """
+    2D convolutional layer implementation using Im2Col.
+    Attrs:
+        weight (torch.Tensor): The weight tensor of the layer.
+        bias (torch.Tensor): The bias tensor of the layer.
+    Args:
+        matmul (torch.nn.Module): The matrix multiplication module.
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        kernel_size (Union[int, Tuple[int, int]]): Size of the convolutional kernel.
+        stride (Union[int, Tuple[int, int]], optional): Stride of the convolution. Defaults to (1, 1).
+        padding (Union[int, Tuple[int, int]], optional): Padding added to the input. Defaults to (0, 0).
+        dilation (Union[int, Tuple[int, int]], optional): Dilation rate of the convolution. Defaults to (1, 1).
+    """
+    def __init__(
+        self,
+        matmul,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=(1, 1),
+        padding=(0, 0),
+        dilation=(1, 1),
+    ) -> None:
+        """Initialize a Convolutional layer.
+        Args:
+            matmul: The matrix multiplication function to be used.
+            in_channels: The number of input channels.
+            out_channels: The number of output channels.
+            kernel_size: The size of the convolutional kernel.
+            stride: The stride of the convolution. Defaults to (1, 1).
+            padding: The padding added to the input. Defaults to (0, 0).
+            dilation: The dilation rate of the convolution. Defaults to (1, 1).
+        """
+        super().__init__()
+        self.matmul = matmul
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.padding = padding
+        self.dilation = dilation
+        self.stride = stride
+    @property
+    def weight(self) -> torch.Tensor:
+        """
+        Get the weight tensor of the layer.
+        Returns:
+            torch.Tensor: The weight tensor.
+        """
+        return self.matmul.weight
+    @property
+    def bias(self) -> torch.Tensor:
+        """
+        Get the bias tensor of the layer.
+        Returns:
+            torch.Tensor: The bias tensor.
+        """
+        return self.matmul.bias
+    def compute_output_dim(self, dim, idx) -> int:
+        """
+        Compute the output dimension for a given input dimension.
+        Args:
+            dim (int): Input dimension.
+            idx (int): Index of the dimension.
+        Returns:
+            int: Output dimension.
+        """
+        return (
+            dim
+            + 2 * self.padding[idx]
+            - self.dilation[idx] * (self.kernel_size[idx] - 1)
+            - 1
+        ) // self.stride[idx] + 1
+    def forward(self, x) -> torch.Tensor:
+        """
+        Forward pass of the convolutional layer.
+        Args:
+            x (torch.Tensor): Input tensor.
+        Returns:
+            torch.Tensor: Output tensor.
+        """
+        # Unfold the input
+        inp_unf = torch.nn.functional.unfold(
+            x, self.kernel_size, self.dilation, self.padding, self.stride
+        ).transpose(1, 2)
+        out_unf = self.matmul(inp_unf).transpose(1, 2)
+        out_shape = [x.shape[0], self.out_channels] + [
+            self.compute_output_dim(dim, idx) for idx, dim in enumerate(x.shape[2:])
+        ]
+        out = out_unf.view(out_shape)
+        return out
+    @staticmethod
+    def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Im2ColConv2d":
+        """
+        Create a Conv2d layer from a torch.nn.Conv2d layer.
+        Args:
+            layer (torch.nn.Conv2d): The torch Conv2d layer.
+            dtype (torch.dtype, optional): Data type of the layer.
+        Returns:
+            Im2ColConv2d: The converted Im2ColConv2d layer.
+        """
+        weight = layer.weight.view(layer.weight.shape[0], -1)
+        matmul = Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
+        new_layer = Im2ColConv2d(
+            matmul,
+            layer.in_channels,
+            layer.out_channels,
+            layer.kernel_size,
+            layer.stride,
+            layer.padding,
+            layer.dilation,
+        )
+        return new_layer
+class Conv2d(torch.nn.Module):
+    """
+    2D convolutional layer implementation.
+    Attrs:
+        weight (torch.Tensor): The weight tensor of the layer.
+        bias (torch.Tensor): The bias tensor of the layer.
+    """
+    def __init__(
+        self,
+        weights: torch.Tensor,
+        bias: Optional[torch.Tensor] = None,
+        strides: Union[int, Sequence[int]] = 1,
+        padding: Union[int, Sequence[int]] = 0,
+        dilation: Union[int, Sequence[int]] = 1,
+        groups: int = 1,
+    ) -> None:
+        """Initialize a Convolutional layer.
+        Args:
+            weights (torch.Tensor): The weight tensor of the layer.
+            bias (Optional[torch.Tensor], optional): The bias tensor of the layer. Defaults to None.
+            strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
+            padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
+            dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
+            groups (int, optional): Groups. Defaults to 1.
+        """
+        super().__init__()
+        self.op_id = str(uuid.uuid4())
+        if groups > 1:
+            new_shape = [groups, weights.shape[0] // groups] + list(weights.shape[1:])
+            weights = weights.view(*new_shape)
+        self.parameters = [weights]
+        if bias is not None:
+            self.parameters.append(bias)
+        self.backend_cls = partial(
+            Convolution,
+            weights_shape=weights.shape,
+            bias=bias,
+            strides=strides,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+    @property
+    def weight(self) -> torch.Tensor:
+        """
+        Get the weight tensor of the layer.
+        Returns:
+            torch.Tensor: The weight tensor.
+        """
+        return self.parameters[0]
+    @property
+    def bias(self) -> torch.Tensor:
+        """
+        Get the bias tensor of the layer.
+        Returns:
+            torch.Tensor: The bias tensor.
+        """
+        if len(self.parameters) > 1:
+            return self.parameters[1]
+        return None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Torch module forward method.
+        Args:
+            x (torch.Tensor): Input tensor
+        Returns:
+            torch.Tensor: result
+        """
+        return run_factory(x, self.parameters, self.backend_cls, self.op_id)
+    @staticmethod
+    def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
+        """
+        Create a Conv2d layer from a torch.nn.Conv2d layer.
+        Args:
+            layer (torch.nn.Conv2d): The torch Conv2d layer.
+            dtype (torch.dtype, optional): Data type of the layer.
+        Returns:
+            Conv2d: The converted Conv2d layer.
+        """
+        # In case of unsupported configuration, fallback to Im2ColConv2d
+        if any(dim > 11 for dim in layer.kernel_size):
+            return Im2ColConv2d.fromTorch(layer, dtype)
+        new_layer = Conv2d(
+            layer.weight,
+            layer.bias,
+            layer.stride,
+            layer.padding,
+            layer.dilation,
+            layer.groups,
+        )
+        return new_layer