PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp310-cp310-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

intel_npu_acceleration_library/nn/module.py ADDED Viewed

@@ -0,0 +1,393 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from intel_npu_acceleration_library.backend import NNFactory, Tensor
+from typing import MutableMapping, Sequence, Any, List
+import numpy as np
+import torch
+def pt_to_np_dtype(torch_dtype: torch.dtype) -> np.dtype:
+    """Convert a PyTorch dtype to a NumPy dtype.
+    Args:
+        torch_dtype (torch.dtype): The PyTorch dtype to convert.
+    Raises:
+        ValueError: If the PyTorch dtype is not supported.
+    Returns:
+        np.dtype: The NumPy dtype.
+    """
+    if torch_dtype == torch.float16:
+        return np.float16
+    elif torch_dtype == torch.float32:
+        return np.float32
+    elif torch_dtype == torch.float64:
+        return np.float64
+    elif torch_dtype == torch.int8:
+        return np.int8
+    elif torch_dtype == torch.int16:
+        return np.int16
+    elif torch_dtype == torch.int32:
+        return np.int32
+    elif torch_dtype == torch.int64:
+        return np.int64
+    else:
+        raise ValueError(f"Unsupported dtype {torch_dtype}")
+def compute_input_signature(
+    args: Sequence[Any], kwargs: MutableMapping[str, Any]
+) -> str:
+    """Compute the input signature of a function call.
+    Args:
+        args (Sequence[Any]): The positional arguments.
+        kwargs (MutableMapping[str, Any]): The keyword arguments.
+    Returns:
+        str: The input signature.
+    """
+    signature = []
+    for arg in args:
+        if isinstance(arg, torch.Tensor):
+            signature.append("_".join(str(dim) for dim in arg.shape))
+            signature.append(str(arg.dtype))
+        else:
+            signature.append(str(arg))
+    for k, arg in kwargs.items():
+        if isinstance(arg, torch.Tensor):
+            signature.append(str(k))
+            signature.append("_".join(str(dim) for dim in arg.shape))
+            signature.append(str(arg.dtype))
+        else:
+            signature.append(str(arg))
+    return "_".join(signature)
+def patch_parameters(module: torch.nn.Module, model: NNFactory, recurse: bool = False):
+    """Patch the parameters of a PyTorch module with constants.
+    Args:
+        module (torch.nn.Module): The PyTorch module.
+        model (NNFactory): The NNFactory instance.
+        recurse (bool, optional): Recurse over all submodules. Defaults to False.
+    """
+    elements = list(module.named_parameters(recurse=recurse))
+    for name, param in elements:
+        del module._parameters[name]
+        setattr(module, name, model.constant(param.data.detach().numpy()))
+    buffers = list(module.named_buffers(recurse=recurse))
+    for name, param in buffers:
+        del module._buffers[name]
+        setattr(module, name, model.constant(param.data.detach().numpy()))
+def patch_modules(module: torch.nn.Module, model: NNFactory):
+    """Patch the modules of a PyTorch module with constants.
+    Args:
+        module (torch.nn.Module): The PyTorch module.
+        model (NNFactory): The NNFactory instance.
+    """
+    modules = list(module.named_children())
+    for _, module in modules:
+        if isinstance(module, Module):
+            module.npu_top_level_module = False
+        # patch_parameters(module, model)
+        patch_modules(module, model)
+class Module(torch.nn.Module):
+    """A PyTorch module that runs on the NPU."""
+    def __init__(self) -> None:
+        """Initialize the module."""
+        super().__init__()
+        self._nn_factory_cache: MutableMapping[str, NNFactory] = {}
+        self._npu_inference = False
+        self.npu_top_level_module = True
+    def extract_tensors_from_arguments(
+        self, args: Sequence[Any]
+    ) -> Sequence[torch.Tensor]:
+        """Extract the tensors from the arguments.
+        Args:
+            args (Sequence[Any]): The positional arguments.
+        Returns:
+            Sequence[torch.Tensor]: The tensors.
+        """
+        tensors, non_tensors = [], []
+        for arg in args:
+            if isinstance(arg, torch.Tensor):
+                tensors.append(arg)
+            elif isinstance(arg, (list, tuple)):
+                tensor_list, non_tensor_list = self.extract_tensors_from_arguments(arg)
+                tensors.extend(tensor_list)
+                non_tensors.extend(non_tensor_list)
+            elif isinstance(arg, dict):
+                tensor_list, non_tensor_list = self.extract_tensors_from_arguments(
+                    list(arg.values())
+                )
+                tensors.extend(tensor_list)
+                non_tensors.extend(non_tensor_list)
+        return tensors, non_tensors
+    def factory_forward(self, *args: Any, **kwargs: Any):
+        """Run the model using the factory.
+        Args:
+            args (Any): The positional arguments.
+            kwargs (Any): The keyword arguments.
+        Returns:
+            torch.Tensor: The output tensor.
+        """
+        signature = compute_input_signature(args, kwargs)
+        model = self._nn_factory_cache[signature]
+        tensor_args, non_tensor_args = self.extract_tensors_from_arguments(args)
+        tensor_args.extend(
+            self.extract_tensors_from_arguments(list(kwargs.values()))[0]
+        )
+        return model(*tensor_args, *non_tensor_args, **kwargs)
+    def create_model(
+        self, args: Sequence[Any], kwargs: MutableMapping[str, Any]
+    ) -> NNFactory:
+        """Create a model from the module.
+        Args:
+            args (Sequence[Any]): positional arguments
+            kwargs (MutableMapping[str, Any]): keyword arguments
+        Returns:
+            NNFactory: The model.
+        """
+        model = NNFactory()
+        def create_args_from_list(args: Sequence[Any]) -> Sequence[Any]:
+            """Create arguments from a list.
+            Args:
+                args (Sequence[Any]): The arguments.
+            Returns:
+                Sequence[Any]: The npu converted arguments.
+            """
+            npu_args: List[Any] = []
+            for arg in args:
+                if isinstance(arg, torch.Tensor):
+                    npu_args.append(
+                        model.parameter(arg.shape, pt_to_np_dtype(arg.dtype))
+                    )
+                elif isinstance(arg, (list, tuple)):
+                    npu_args.append(create_args_from_list(arg))
+                elif isinstance(arg, dict):
+                    npu_args.append(create_kwargs_from_list(arg))
+                else:
+                    npu_args.append(arg)
+            return npu_args
+        def create_kwargs_from_list(
+            kwargs: MutableMapping[str, Any]
+        ) -> MutableMapping[str, Any]:
+            """Create keyword arguments from a list.
+            Args:
+                kwargs (MutableMapping[str, Any]): The keyword arguments.
+            Returns:
+                MutableMapping[str, Any]: The npu converted keyword arguments.
+            """
+            npu_kwargs: MutableMapping[str, Any] = {}
+            for k, arg in kwargs.items():
+                if isinstance(arg, torch.Tensor):
+                    npu_kwargs[k] = model.parameter(
+                        arg.shape, pt_to_np_dtype(arg.dtype)
+                    )
+                elif isinstance(arg, (list, tuple)):
+                    npu_kwargs[k] = create_args_from_list(arg)
+                elif isinstance(arg, dict):
+                    npu_kwargs[k] = create_kwargs_from_list(arg)
+                else:
+                    npu_kwargs[k] = arg
+            return npu_kwargs
+        npu_args = create_args_from_list(args)
+        npu_kwargs = create_kwargs_from_list(kwargs)
+        patch_modules(self, model)
+        # patch_parameters(self, model)
+        _ = self.forward(*npu_args, **npu_kwargs)
+        model.compile()
+        return model
+    def _call_impl(self, *args: Any, **kwargs: Any) -> Any:
+        """Call the module.
+        Args:
+            args (Any): The positional arguments.
+            kwargs (Any): The keyword arguments.
+        Returns:
+            Any: The output of the module.
+        """
+        if self._npu_inference and self.npu_top_level_module:
+            signature = compute_input_signature(args, kwargs)
+            if signature not in self._nn_factory_cache:
+                self._nn_factory_cache[signature] = self.create_model(args, kwargs)
+            # Run the model by replacing the forward method with the factory_forward
+            old_forward = self.forward
+            self.forward = self.factory_forward  # type: ignore
+            out = super()._call_impl(*args, **kwargs)
+            # Restore the original forward method
+            self.forward = old_forward  # type: ignore
+            return out
+        else:
+            return super()._call_impl(*args, **kwargs)
+    def to(self, *args, **kwargs):
+        """Move the module to a device or to a different dtype.
+        Args:
+            args (Any): The positional arguments.
+            kwargs (Any): The keyword arguments.
+        Returns:
+            torch.Tensor: The output tensor.
+        """
+        device = kwargs.get("device", None)
+        args = list(args)
+        if device is None:
+            for idx, arg in enumerate(args):
+                if isinstance(arg, str) and arg.lower() in ["npu"]:
+                    device = "npu"
+                    args[idx] = "cpu"
+        else:
+            kwargs["device"] = "cpu"
+        if device.lower() == "npu":
+            self._npu_inference = True
+        return super().to(*args, **kwargs)
+    def forward(self, *args, **kwargs) -> torch.Tensor:
+        """Run the forward pass of the module.
+        Args:
+            args (Any): The positional arguments.
+            kwargs (Any): The keyword arguments.
+        Raises:
+            NotImplementedError: If the forward method is not implemented.
+        Returns:
+            torch.Tensor: The output tensor.
+        """
+        raise NotImplementedError
+        return torch.empty(0)
+class NPUModuleWrapper(Module):
+    """A PyTorch module that runs on the NPU."""
+    def __init__(self, module: torch.nn.Module) -> None:
+        """Initialize the module.
+        Args:
+            module (torch.nn.Module): The PyTorch module.
+        """
+        super().__init__()
+        self.module = module
+    def forward(self, *args, **kwargs) -> torch.Tensor:
+        """Run the forward pass of the module.
+        Args:
+            args (Any): The positional arguments.
+            kwargs (Any): The keyword arguments.
+        Returns:
+            torch.Tensor: The output tensor.
+        """
+        return self.module(*args, **kwargs)
+def convert_to_npu_module(module: torch.nn.Module) -> Module:
+    """Convert a PyTorch module to an NPU Module.
+    Args:
+        module (torch.nn.Module): The PyTorch module.
+    Returns:
+        Module: The NPU enabled Module.
+    """
+    return NPUModuleWrapper(module).eval()
+class NPUContextManager(NNFactory):
+    """NPU context manager."""
+    def __enter__(self):
+        """Enter the context.
+        Returns:
+            NPUContextManager: self
+        """
+        return self
+    def Constant(self, tensor: torch.Tensor) -> Tensor:
+        """Create a tensor.
+        Args:
+            tensor (torch.Tensor): tensor
+        Returns:
+            torch.Tensor: tensor
+        """
+        return self.constant(tensor)  # type: ignore
+    def Tensor(
+        self, shape: Sequence[int], dtype: torch.dtype = torch.float16
+    ) -> Tensor:
+        """Create a tensor.
+        Args:
+            shape (Sequence[int]): tensor shape
+            dtype (torch.dtype): tensor dtype, default to torch.float16
+        Returns:
+            Tensor: tensor
+        """
+        return self.parameter(shape, dtype=dtype)  # type: ignore
+    def __exit__(self, exc_type, exc_value, traceback):
+        """Exit the context.
+        Args:
+            exc_type: exception type
+            exc_value: exception value
+            traceback: traceback
+        Raises:
+            RuntimeError: If an exception is raised.
+        """
+        # If there is no exception, call the compile
+        if exc_type is None:
+            self.compile()
+        else:
+            # raise the exception
+            print(exc_type, exc_value, traceback)
+            raise RuntimeError(exc_value)  # .with_traceback(traceback)

intel_npu_acceleration_library/optimizations.py ADDED Viewed

@@ -0,0 +1,157 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+from typing import Dict, List, Any
+import torch.nn as nn
+import torch.fx as fx
+import operator
+import torch
+def delattr_recursively(module: nn.Module, target: str):
+    """Delete attribute recursively by name in a torch.nn.Module.
+    Args:
+        module (nn.Module): the nn.Module
+        target (str): the attribute you want to delete
+    """
+    *root, name = target.rsplit(".", 1)
+    if root:
+        root = root[0].split(".")
+        delattr_recursively(getattr(module, root[0]), ".".join(root[1:] + [name]))
+    else:
+        delattr(module, target)
+def fuse_linear_layers(
+    model: nn.Module,
+    modules: Dict[str, nn.Linear],
+    targets: List[str],
+    fused_layer_name: str,
+) -> None:
+    """Fuse two linear layers and append them to the nn Module.
+    Args:
+        model (nn.Module): Origianl nn.Module object
+        modules (Dict[nn.Linear]): a dictiorany of node name: linear layer
+        targets (List[str]): list of layer node names
+        fused_layer_name (str): fused layer name
+    Raises:
+        ValueError: All linear layers must be of type nn.Linear and must have the same input dimension
+    """
+    # Get the attributes
+    layers = [modules[name] for name in targets]
+    in_features = list({layer.in_features for layer in layers})
+    # ensure both linear layers have the same input dimensions and are not already fused
+    if not all(isinstance(layer, nn.Linear) for layer in layers):
+        raise ValueError("All linear layers must be of type nn.Linear")
+    if len(in_features) != 1:
+        raise ValueError(
+            f"All linear layers must have the same input dimensions. Instead found: {in_features}"
+        )
+    # Create the new fused linear layer
+    new_out_features = sum([layer.out_features for layer in layers])
+    has_bias = any(layer.bias is not None for layer in layers)
+    fused_layer = nn.Linear(in_features[0], new_out_features, bias=has_bias)
+    # Concatenate the weights and biases
+    with torch.no_grad():
+        start, stop = 0, 0
+        for layer in layers:
+            stop += layer.out_features
+            fused_layer.weight[start:stop, :] = layer.weight
+            if has_bias:
+                if layer.bias is not None:
+                    fused_layer.bias[start:stop] = layer.bias
+                else:
+                    fused_layer.bias[start:stop] = torch.zeros_like(
+                        fused_layer.bias[start:stop]
+                    )
+            start = stop
+    # Replace the two layers in the original model with the new fused layer
+    setattr(model, fused_layer_name, fused_layer)
+    for layer_name in targets:
+        delattr_recursively(model, layer_name)
+def horizontal_fusion_linear(model: torch.nn.Module) -> torch.nn.Module:
+    """Fuze horizontally two or more linear layers that share the same origin. This will increase NPU hw utilization.
+    Args:
+        model (torch.nn.Module): The original nn.Module
+    Returns:
+        torch.nn.Module: optimize nn.Module where parallel linear operations has been fused into a single bigger one
+    """
+    fx_model = fx.symbolic_trace(model)
+    modules = dict(fx_model.named_modules())
+    # new_graph = copy.deepcopy(fx_model.graph)
+    def node_condition(node: Any) -> bool:
+        """Return true if the node is a module and is nn.Linear.
+        Args:
+            node (Any): A torch fx node
+        Returns:
+            bool: return condition
+        """
+        return node.op == "call_module" and isinstance(modules[node.target], nn.Linear)
+    # First, find all node with a linear layer
+    linear_nodes = [node for node in fx_model.graph.nodes if node_condition(node)]
+    # Group the linear layers by input node
+    linear_nodes_parents: Dict[str, List[Any]] = {}
+    for node in linear_nodes:
+        linear_nodes_parents.setdefault(node.args[0], []).append(node)
+    # Get the ones with size > 1
+    fused_modules = [
+        (source, modules)
+        for source, modules in linear_nodes_parents.items()
+        if len(modules) > 1
+    ]
+    for source, layers in fused_modules:
+        fused_layer_name = "fused_" + "_".join(node.target for node in layers)
+        fused_layer_name = fused_layer_name.replace(".", "_")
+        fuse_linear_layers(
+            fx_model, modules, [layer.target for layer in layers], fused_layer_name
+        )
+        with fx_model.graph.inserting_after(source):
+            fused_node = fx_model.graph.call_module(fused_layer_name, (source,))
+        with fx_model.graph.inserting_after(fused_node):
+            start, stop = 0, 0
+            for layer in layers:
+                stop += modules[layer.target].out_features
+                layer_slice = fx_model.graph.call_function(
+                    operator.getitem,
+                    args=(
+                        fused_node,
+                        (
+                            Ellipsis,
+                            slice(start, stop, None),
+                        ),
+                    ),
+                    kwargs={},
+                )
+                layer.replace_all_uses_with(layer_slice)
+                fx_model.graph.erase_node(layer)
+                start = stop
+    fx_model.graph.lint()
+    fx_model.recompile()
+    return fx_model