PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20241112__cp311-cp311-win_amd64.whl → 2.6.0b20241118__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20241112__cp311-cp311-win_amd64.whl → 2.6.0b20241118__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

intel_npu_acceleration_library/backend/factory.py CHANGED Viewed

@@ -7,7 +7,7 @@ from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefet
 from intel_npu_acceleration_library.backend.ops import get_supported_ops
 from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
 from intel_npu_acceleration_library.backend.tensor import Tensor
-from intel_npu_acceleration_library.dtypes import int4, bfloat16
+from intel_npu_acceleration_library.dtypes import int4, bfloat16, get_backend_dtype
 from typing import Optional, Tuple, Any, Union, Sequence, TypeVar, Callable, cast, List
 from functools import partial
 import numpy.typing as npt
@@ -71,17 +71,24 @@ class NNFactory(BaseNPUBackendWithPrefetch):
                 Tensor: Tensor object
             """
             # Convert Tensor objects to their underlying node
-            args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
             kwargs = {
                 k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
             }
+            if fn.__qualname__ == 'NNFactory.reshape':
+                output_idx = args[0].output_idx
+                kwargs["output_idx"] = output_idx
+            args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
             input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
                 v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
             ]
             # Call the function
             node = fn(self, *args, **kwargs)
+            output_len = backend_lib.op_output_size(node)
             # remove input nodes from output_nodes
             self.output_nodes = [
                 node for node in self.output_nodes if node not in input_nodes
@@ -91,7 +98,13 @@ class NNFactory(BaseNPUBackendWithPrefetch):
                 self.output_nodes.append(node)
             # Wrap the node in a Tensor object
-            return Tensor(factory=self, node=node)
+            if output_len == 1:
+                return Tensor(factory=self, node=node, output_idx=0)
+            else:
+                output_tensor_list = []
+                for i in range(output_len):
+                    output_tensor_list.append(Tensor(factory=self, node=node, output_idx=i))
+                return output_tensor_list
         return cast(F, wrapper)
@@ -184,34 +197,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         Args:
             dtype: numpy dtype
-        Raises:
-            RuntimeError: Unsupported datatype
         Returns:
             ctypes.c_char_p: string representation of the dtype
         """
-        if dtype in [np.int8, torch.int8]:
-            str_dtype = "int8"
-        elif dtype == np.uint8 or dtype == int4:
-            # u8 represents packed i4 dtypes
-            str_dtype = "int4"
-        elif dtype in [np.int16, torch.int16]:
-            str_dtype = "int16"
-        elif dtype in [np.int32, torch.int32]:
-            str_dtype = "int32"
-        elif dtype in [np.int64, torch.int64]:
-            str_dtype = "int64"
-        elif dtype in [np.float16, torch.float16]:
-            str_dtype = "float16"
-        elif dtype in [np.float32, torch.float32]:
-            str_dtype = "float32"
-        elif dtype in [np.float64, torch.float64]:
-            str_dtype = "float64"
-        elif dtype in [bfloat16, torch.bfloat16]:
-            str_dtype = "bfloat16"
-        else:
-            raise RuntimeError(f"DType is not supported {dtype}")
-        return ctypes.c_char_p(str_dtype.encode())
+        return get_backend_dtype(dtype)
     @return_tensor
     def parameter(
@@ -422,6 +411,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         wt_dtype: npt.DTypeLike = np.float16,
         scale_factor: bool = True,
         is_prefill: bool = False,
+        use_dq: bool = True,
     ) -> ctypes._Pointer:
         """Generate a linear layer for dynamic quantization linear layer.
@@ -439,7 +429,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         Returns:
             ctypes._Pointer: output node
         """
-        func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
+        if is_prefill:
+            func = backend_lib.dq_split_linear_prefill if use_dq else backend_lib.gw_linear_prefill
+        else:
+            func = backend_lib.dq_split_linear
         return func(self._mm, input_node, n_splits,
                     input_channels, outout_channels, bias,
                     self.get_backend_dtype(act_dtype),
@@ -448,7 +441,9 @@ class NNFactory(BaseNPUBackendWithPrefetch):
     @return_tensor
     def reshape(
-        self, input_node: ctypes._Pointer, shape: Sequence[int]
+        self, input_node: ctypes._Pointer, shape: Sequence[int],
+        special_zero: bool = True,
+        output_idx: int = 0
     ) -> ctypes._Pointer:
         """Generate a reshape layer.
@@ -460,7 +455,8 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         shape_node = self.constant(shape).node  # type: ignore
-        return backend_lib.reshape(self._mm, input_node, shape_node)
+        return backend_lib.reshape(self._mm, input_node, shape_node,
+                                   special_zero, output_idx)
     @return_tensor
     def broadcast(
@@ -612,7 +608,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if axis < 0:
-            shape_size = backend_lib.op_shape_size(input_node_1)
+            shape_size = backend_lib.op_shape_size(input_node_1, 0)
             axis = (axis + shape_size) % shape_size
         axis = np.int64(axis)
         return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
@@ -631,7 +627,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if axis < 0:
-            shape_size = backend_lib.op_shape_size(input_nodes[0])
+            shape_size = backend_lib.op_shape_size(input_nodes[0], 0)
             axis = (axis + shape_size) % shape_size
         axis = np.int64(axis)
@@ -656,7 +652,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if reduction_axes is None:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             reduction_axes = list(range(shape_size - 1, -1, -1))
         axis_node = self.constant(reduction_axes).node  # type: ignore
         return backend_lib.reduce_max(self._mm, input_node, axis_node, keep_dims)
@@ -679,7 +675,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if reduction_axes is None:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             reduction_axes = list(range(shape_size - 1, -1, -1))
         axis_node = self.constant(reduction_axes).node  # type: ignore
         return backend_lib.reduce_mean(self._mm, input_node, axis_node, keep_dims)
@@ -702,7 +698,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if reduction_axes is None:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             reduction_axes = list(range(shape_size - 1, -1, -1))
         axis_node = self.constant(reduction_axes).node  # type: ignore
         return backend_lib.reduce_min(self._mm, input_node, axis_node, keep_dims)
@@ -725,7 +721,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if reduction_axes is None:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             reduction_axes = list(range(shape_size - 1, -1, -1))
         axis_node = self.constant(reduction_axes).node  # type: ignore
         return backend_lib.reduce_prod(self._mm, input_node, axis_node, keep_dims)
@@ -748,7 +744,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if reduction_axes is None:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             reduction_axes = list(range(shape_size - 1, -1, -1))
         axis_node = self.constant(reduction_axes).node  # type: ignore
         return backend_lib.reduce_sum(self._mm, input_node, axis_node, keep_dims)
@@ -768,7 +764,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             ctypes._Pointer: output node
         """
         if axis < 0:
-            shape_size = backend_lib.op_shape_size(input_node)
+            shape_size = backend_lib.op_shape_size(input_node, 0)
             axis = (axis + shape_size) % shape_size
         axis_node = self.constant(axis).node  # type: ignore
         return backend_lib.normL2(self._mm, input_node, axis_node, eps)
@@ -791,14 +787,14 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         Returns:
             ctypes._Pointer: output node
         """
-        input_shape_size = backend_lib.op_shape_size(input_node)
+        input_shape_size = backend_lib.op_shape_size(input_node, 0)
         input_shape = [
-            backend_lib.op_shape(input_node, i) for i in range(input_shape_size)
+            backend_lib.op_shape(input_node, i, 0) for i in range(input_shape_size)
         ]
         if isinstance(exponent, ctypes._Pointer):
-            exponent_shape_size = backend_lib.op_shape_size(input_node)
+            exponent_shape_size = backend_lib.op_shape_size(input_node, 0)
             exponent_shape = [
-                backend_lib.op_shape(exponent, i) for i in range(exponent_shape_size)
+                backend_lib.op_shape(exponent, i, 0) for i in range(exponent_shape_size)
             ]
         else:
             exponent_shape = list(exponent.shape)
@@ -807,6 +803,39 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         #     raise ValueError("Input tensor shapes are not equal")
         return backend_lib.power(self._mm, input_node, exponent)
+    @return_tensor
+    def variadic_split(
+        self,
+        input: ctypes._Pointer,
+        axis: int,
+        split_lengths: Sequence[int],
+    ) -> ctypes._Pointer:
+        """Generate an average pooling layer.
+        Args:
+            input (ctypes._Pointer): layer input node
+            axis (int): split axis
+            split_lengths (Sequence[int]): A list containing the sizes of each output tensor
+            along the split "axis". Size of "split_lengths" should be equal to the number of
+            outputs. The sum of split_lengths must match data.shape[axis]
+        Raises:
+            NotImplementedError: divisor_override is not supported
+        Returns:
+            ctypes._Pointer: output node
+        """
+        split_lens_ptr = np.array(split_lengths, dtype=np.uint32)
+        return backend_lib.variadic_split(
+            self._mm,
+            input,
+            axis,
+            split_lens_ptr,
+            split_lens_ptr.size,
+        )
     @return_tensor
     def avg_pooling(
@@ -962,7 +991,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
                                                         value, attn_mask,
                                                         is_causal)
-    def get_tensor_shape(self, node):
+    def get_tensor_shape(self, node, output_idx=0):
         """Get tensor shape.
         Args:
@@ -971,10 +1000,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         Returns:
             tuple[int]: tensor shape
         """
-        size = backend_lib.op_shape_size(node)
-        return tuple([backend_lib.op_shape(node, idx) for idx in range(size)])
+        size = backend_lib.op_shape_size(node, output_idx)
+        return tuple([backend_lib.op_shape(node, idx, output_idx) for idx in range(size)])
-    def get_tensor_dtype(self, node):
+    def get_tensor_dtype(self, node, output_idx=0):
         """Get tensor dtype.
         Args:
@@ -986,7 +1015,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         Returns:
             str: tensor dtype
         """
-        dtype_int = backend_lib.op_dtype(node)
+        dtype_int = backend_lib.op_dtype(node, output_idx)
         if dtype_int == 2:
             return np.bool

intel_npu_acceleration_library/backend/ops.py CHANGED Viewed

@@ -98,7 +98,7 @@ def get_supported_ops() -> List[SupportedOp]:
             inputs=3,
             parameters=[ctypes.c_int],
         ),
-        SupportedOp(name="reshape", inputs=2),
+        SupportedOp(name="reshape", inputs=2, parameters=[ctypes.c_bool, ctypes.c_int]),
         SupportedOp(name="transpose", inputs=2),
         SupportedOp(name="squeeze", inputs=1),
         SupportedOp(name="unsqueeze", inputs=2),
@@ -137,5 +137,6 @@ def get_supported_ops() -> List[SupportedOp]:
         SupportedOp(name="power", inputs=2),
         SupportedOp(name="broadcast", inputs=2),
         SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
+        SupportedOp(name="rotate_half", inputs=1),
     ]
     return supported_ops

intel_npu_acceleration_library/backend/tensor.py CHANGED Viewed

@@ -16,14 +16,83 @@ from intel_npu_acceleration_library.dtypes import (
     int32,
     int64,
     NPUDtype,
+    get_backend_dtype,
 )
 from dataclasses import dataclass
 import functools
+from math import prod
 import numpy as np
 import ctypes
 import torch
+class RemoteTensor(torch.Tensor):
+    """
+    Represent a remote tensor object.
+    Attrs:
+        _remote_tensor (ctypes._Pointer): The pointer to the underlying remote tensor.
+    Methods:
+        from_torch(x: torch.Tensor): Create a remote tensor from a torch tensor.
+    """
+    _remote_tensor = None
+    @staticmethod
+    def __new__(cls, x: Any, remote_tensor: ctypes._Pointer, *args: Any, **kwargs: Any):
+        """
+        Create a new remote tensor object.
+        Args:
+            x (Any): tensor input
+            remote_tensor (ctypes._Pointer): remote tensor pointer
+            args (Any): additional arguments
+            kwargs (Any): additional keyword arguments
+        Returns:
+            RemoteTensor: a RemoteTensor object
+        """
+        return super().__new__(cls, x, *args, **kwargs)
+    def __init__(self, x: Any, remote_tensor: ctypes._Pointer):
+        """
+        Initialize the remote tensor object.
+        Args:
+            x (Any): tensor input
+            remote_tensor (ctypes._Pointer): remote tensor pointer
+        """
+        self._remote_tensor = remote_tensor
+    # def __del__(self):
+    #     if self._remote_tensor and backend_lib:
+    #         backend_lib.del_remote_tensor(self._remote_tensor)
+    @staticmethod
+    def from_torch(x: torch.Tensor) -> "RemoteTensor":
+        """
+        Create a remote tensor from a torch tensor.
+        Args:
+            x (torch.Tensor): The torch tensor.
+        Returns:
+            RemoteTensor: The remote tensor.
+        """
+        shape_arr = np.array(x.shape, dtype=np.uint32)
+        dtype_str = get_backend_dtype(x.dtype)
+        p = ctypes.cast(x.data_ptr(), ctypes.c_void_p)
+        rt = backend_lib.to_npu(shape_arr.size, shape_arr, dtype_str, p)
+        pointer = ctypes.cast(
+            backend_lib.remote_tensor_data(rt),
+            ctypes.POINTER(ctypes.c_uint8),
+        )
+        arr = (pointer._type_ * prod(x.shape) * x.element_size()).from_address(
+            ctypes.addressof(pointer.contents)
+        )
+        pt_tensor = torch.frombuffer(arr, dtype=x.dtype).view(*x.shape)
+        return RemoteTensor(pt_tensor, rt)
 @dataclass
 class Tensor:
     """
@@ -88,6 +157,7 @@ class Tensor:
     factory: "NNFactory"  # type: ignore # noqa: F821
     node: ctypes._Pointer
+    output_idx: int
     @property
     def shape(self) -> Sequence[int]:
@@ -97,8 +167,8 @@ class Tensor:
         Returns:
             Sequence[int]: The shape of the tensor.
         """
-        shape_size = backend_lib.op_shape_size(self.node)
-        return [backend_lib.op_shape(self.node, i) for i in range(shape_size)]
+        shape_size = backend_lib.op_shape_size(self.node, self.output_idx)
+        return [backend_lib.op_shape(self.node, i, self.output_idx) for i in range(shape_size)]
     @property
     def dtype(self) -> NPUDtype:
@@ -108,7 +178,7 @@ class Tensor:
         Returns:
             type: The data type of the tensor.
         """
-        dtype_int = backend_lib.op_dtype(self.node)
+        dtype_int = backend_lib.op_dtype(self.node, self.output_idx)
         if dtype_int == 2:
             return np.bool

intel_npu_acceleration_library/device.py CHANGED Viewed

@@ -4,6 +4,7 @@
 #
 from intel_npu_acceleration_library.nn.module import convert_to_npu_module
+from intel_npu_acceleration_library.backend.tensor import RemoteTensor
 from torch.overrides import TorchFunctionMode
 from functools import lru_cache
 from typing import Any, MutableMapping
@@ -165,8 +166,7 @@ def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
     """
     npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
     if npu_device:
-        # None for now, once the remote tensor feature lands, it can be converted to a remote tensor
-        pass
+        return super_fn(RemoteTensor.from_torch(self), *args, **kwargs)
     return super_fn(self, *args, **kwargs)

intel_npu_acceleration_library/dtypes.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dataclasses import dataclass
 from typing import Union
 import numpy as np
 import torch
+import ctypes
 @dataclass(frozen=True)
 class NPUDtype:
@@ -81,6 +81,39 @@ class NPUDtype:
         return self.name
+def get_backend_dtype(dtype) -> ctypes.c_char_p:
+    """Get the string representation of the dtype.
+    Args:
+        dtype: numpy dtype
+    Raises:
+        RuntimeError: Unsupported datatype
+    Returns:
+        ctypes.c_char_p: string representation of the dtype
+    """
+    if dtype in [np.int8, torch.int8]:
+        str_dtype = "int8"
+    elif dtype in [np.uint8, int4, torch.uint8]:
+        # u8 represents packed i4 dtypes
+        str_dtype = "int4"
+    elif dtype in [np.int16, torch.int16]:
+        str_dtype = "int16"
+    elif dtype in [np.int32, torch.int32]:
+        str_dtype = "int32"
+    elif dtype in [np.int64, torch.int64]:
+        str_dtype = "int64"
+    elif dtype in [np.float16, torch.float16]:
+        str_dtype = "float16"
+    elif dtype in [np.float32, torch.float32]:
+        str_dtype = "float32"
+    elif dtype in [np.float64, torch.float64]:
+        str_dtype = "float64"
+    elif dtype in [bfloat16, torch.bfloat16]:
+        str_dtype = "bfloat16"
+    else:
+        raise RuntimeError(f"DType is not supported {dtype}")
+    return ctypes.c_char_p(str_dtype.encode())
 float16 = NPUDtype(
     "fp16",
     16,

intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py CHANGED Viewed

@@ -18,3 +18,4 @@ from openvino._pyopenvino._offline_transformations import compress_model_transfo
 from openvino._pyopenvino._offline_transformations import compress_quantize_weights_transformation
 from openvino._pyopenvino._offline_transformations import convert_sequence_to_tensor_iterator_transformation
 from openvino._pyopenvino._offline_transformations import paged_attention_transformation
+from openvino._pyopenvino._offline_transformations import stateful_to_stateless_transformation

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""
+Package: openvino
+Low level wrappers for the FrontEnd C++ API.
+"""
+# flake8: noqa
+try:
+    from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder
+except ImportError as err:
+    raise ImportError("OpenVINO JAX frontend is not available, please make sure the frontend is built."
+                      "{}".format(err))