PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

intel_npu_acceleration_library/backend/runtime.py CHANGED Viewed

@@ -27,6 +27,7 @@ def run_matmul(
     x: torch.Tensor,
     weights: torch.Tensor,
     scale: Optional[torch.Tensor] = None,
+    zero: Optional[torch.Tensor] = None,
     op_id: Optional[str] = None,
 ) -> torch.Tensor:
     """Run a matmul operation. Depending on the datatype of the weights it runs a float or quantized operation.
@@ -35,6 +36,7 @@ def run_matmul(
         x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
         weights (torch.Tensor): Weights tensor.  Its dtype can be torch.float16 or torch.int8
         scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
+        zero (Optional[torch.Tensor], optional): Quantization zero for asym_int4. If weights.dtype == torch.uint8 and use asym_int4 then it must be set and asym Defaults to None.
         op_id (Optional[str], optional): Operation ID. Defaults to None.
     Raises:
@@ -68,12 +70,15 @@ def run_matmul(
         op_class = QLinear if op_id is not None else QMatMul
         op_class_name = op_class.__name__
         np_dtype = np.int8 if weights.dtype == torch.int8 else np.uint8
-        create_op = partial(op_class, dtype=np_dtype)
+        create_op = partial(op_class, dtype=np_dtype, asym=(zero is not None))
         if scale is None:
             raise RuntimeError(
                 f"Quantized matmul (weights dtype == {weights.dtype}) requires scale (scale = {scale})"
             )
-        op_args = [weights.numpy(), scale.numpy()]
+        if zero is None:
+            op_args = [weights.numpy(), scale.numpy()]
+        else:
+            op_args = [weights.numpy(), scale.numpy(), zero.numpy()]
     else:
         raise RuntimeError(f"Unsupported dtype for weights {weights.dtype}")

intel_npu_acceleration_library/backend/tensor.py CHANGED Viewed

@@ -16,14 +16,83 @@ from intel_npu_acceleration_library.dtypes import (
     int32,
     int64,
     NPUDtype,
+    get_backend_dtype,
 )
 from dataclasses import dataclass
 import functools
+from math import prod
 import numpy as np
 import ctypes
 import torch
+class RemoteTensor(torch.Tensor):
+    """
+    Represent a remote tensor object.
+    Attrs:
+        _remote_tensor (ctypes._Pointer): The pointer to the underlying remote tensor.
+    Methods:
+        from_torch(x: torch.Tensor): Create a remote tensor from a torch tensor.
+    """
+    _remote_tensor = None
+    @staticmethod
+    def __new__(cls, x: Any, remote_tensor: ctypes._Pointer, *args: Any, **kwargs: Any):
+        """
+        Create a new remote tensor object.
+        Args:
+            x (Any): tensor input
+            remote_tensor (ctypes._Pointer): remote tensor pointer
+            args (Any): additional arguments
+            kwargs (Any): additional keyword arguments
+        Returns:
+            RemoteTensor: a RemoteTensor object
+        """
+        return super().__new__(cls, x, *args, **kwargs)
+    def __init__(self, x: Any, remote_tensor: ctypes._Pointer):
+        """
+        Initialize the remote tensor object.
+        Args:
+            x (Any): tensor input
+            remote_tensor (ctypes._Pointer): remote tensor pointer
+        """
+        self._remote_tensor = remote_tensor
+    # def __del__(self):
+    #     if self._remote_tensor and backend_lib:
+    #         backend_lib.del_remote_tensor(self._remote_tensor)
+    @staticmethod
+    def from_torch(x: torch.Tensor) -> "RemoteTensor":
+        """
+        Create a remote tensor from a torch tensor.
+        Args:
+            x (torch.Tensor): The torch tensor.
+        Returns:
+            RemoteTensor: The remote tensor.
+        """
+        shape_arr = np.array(x.shape, dtype=np.uint32)
+        dtype_str = get_backend_dtype(x.dtype)
+        p = ctypes.cast(x.data_ptr(), ctypes.c_void_p)
+        rt = backend_lib.to_npu(shape_arr.size, shape_arr, dtype_str, p)
+        pointer = ctypes.cast(
+            backend_lib.remote_tensor_data(rt),
+            ctypes.POINTER(ctypes.c_uint8),
+        )
+        arr = (pointer._type_ * prod(x.shape) * x.element_size()).from_address(
+            ctypes.addressof(pointer.contents)
+        )
+        pt_tensor = torch.frombuffer(arr, dtype=x.dtype).view(*x.shape)
+        return RemoteTensor(pt_tensor, rt)
 @dataclass
 class Tensor:
     """
@@ -88,6 +157,7 @@ class Tensor:
     factory: "NNFactory"  # type: ignore # noqa: F821
     node: ctypes._Pointer
+    output_idx: int
     @property
     def shape(self) -> Sequence[int]:
@@ -97,8 +167,8 @@ class Tensor:
         Returns:
             Sequence[int]: The shape of the tensor.
         """
-        shape_size = backend_lib.op_shape_size(self.node)
-        return [backend_lib.op_shape(self.node, i) for i in range(shape_size)]
+        shape_size = backend_lib.op_shape_size(self.node, self.output_idx)
+        return [backend_lib.op_shape(self.node, i, self.output_idx) for i in range(shape_size)]
     @property
     def dtype(self) -> NPUDtype:
@@ -108,7 +178,7 @@ class Tensor:
         Returns:
             type: The data type of the tensor.
         """
-        dtype_int = backend_lib.op_dtype(self.node)
+        dtype_int = backend_lib.op_dtype(self.node, self.output_idx)
         if dtype_int == 2:
             return np.bool