PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0b20241101__cp310-cp310-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0b20241101__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bigdl-core-npu
-Version: 2.5.0
+Version: 2.6.0b20241101
 Summary: Intel® NPU Acceleration Library
 Home-page: https://github.com/intel/intel-npu-acceleration-library
 Author: Alessandro Palla

{bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
-intel_npu_acceleration_library/_version.py,sha256=-yyXJHoPI8Uu4p1coZDeAWH6XHHdLed8GM4ogYbrieE,103
+intel_npu_acceleration_library/_version.py,sha256=jcHRT5PZ4LFo80UDBbT1tq0P4zoXqHJw1NiKteCpiQw,112
 intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
 intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
 intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
@@ -7,11 +7,11 @@ intel_npu_acceleration_library/modelling.py,sha256=vSiQOWGJ0l6wGV7zWQtZEkHpnMQIM
 intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v0oInX4YzvkjnqViA70,5469
 intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
 intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
-intel_npu_acceleration_library/backend/base.py,sha256=7L1SE-8HKSB5efP8ACQ5tKa89NBkQlf2IxXrSUxGvjs,8317
-intel_npu_acceleration_library/backend/bindings.py,sha256=zoF6etBvQWwAsQmA-woyivZAmZk1RfJaWNn0QShaPjs,7925
+intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
+intel_npu_acceleration_library/backend/bindings.py,sha256=cla6JRX7pqUDuRmsXN6K9cAKklHz_mb6butatR2Eu9I,8901
 intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
 intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
-intel_npu_acceleration_library/backend/factory.py,sha256=9RyDBzJJYKiFOd0IxMZl5dr6K_pDvfehhrGsE7xTTAw,32773
+intel_npu_acceleration_library/backend/factory.py,sha256=n63KE8X9eOuv2m2MiQFASjzgnkIM9deGtDC-qSHRMMw,38847
 intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
 intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
 intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
 intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
 intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
 intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
-intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=B6ahqIBpVO62GjmyWmgmAfRq9IroDPK4H4Y6cAkbNDM,281600
+intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=wmiXqhTSSogchjl8WNQnNYwjHiMtyYOvPvATeOUQkt8,304640
 intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
 intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
 intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
 intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
 intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
 intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
-bigdl_core_npu-2.5.0.dist-info/METADATA,sha256=NhXfzEaj8jWORFpNU4y5qcnnZVv8sjOMWfnhKcER2cE,1534
-bigdl_core_npu-2.5.0.dist-info/WHEEL,sha256=fsW6--WFfuzX2scefE6JfcSZ5dXg5h59u8lqlpL5uuo,101
-bigdl_core_npu-2.5.0.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
-bigdl_core_npu-2.5.0.dist-info/RECORD,,
+bigdl_core_npu-2.6.0b20241101.dist-info/METADATA,sha256=oOcjv-wWArv5l6x58K0TH2hhhvYI11BjUXyMYRFRcGc,1543
+bigdl_core_npu-2.6.0b20241101.dist-info/WHEEL,sha256=09_eAv2LFHDbyhcOULd5e3WJrC_F5q7AlLDftiw-PyE,101
+bigdl_core_npu-2.6.0b20241101.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
+bigdl_core_npu-2.6.0b20241101.dist-info/RECORD,,

{bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: false
 Tag: cp310-cp310-win_amd64

intel_npu_acceleration_library/_version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 # SPDX-License-Identifier: Apache 2.0
 #
-__version__ = "2.5.0"
+__version__ = "2.6.0b20241101"

intel_npu_acceleration_library/backend/base.py CHANGED Viewed

@@ -153,7 +153,14 @@ class BaseNPUBackendWithPrefetch(BaseNPUBackend):
                         raise ValueError(f"Invalid dtype for scale: {scale.dtype}")
                 else:
                     adapted_weights, shape = adapt_weight(weight)
-                    backend_lib.addFloatParameter(param, adapted_weights, *shape)
+                    if weight.dtype == np.uint8:
+                        backend_lib.addInt4WeightParameter(
+                            param,
+                            adapted_weights,
+                            *shape,
+                        )
+                    else:
+                        backend_lib.addFloatParameter(param, adapted_weights, *shape)
         elif isinstance(weights, np.ndarray):
             adapted_weights, shape = adapt_weight(weights)
             backend_lib.addFloatParameter(param, adapted_weights, *shape)

intel_npu_acceleration_library/backend/bindings.py CHANGED Viewed

@@ -143,7 +143,7 @@ def init_network_factory(lib: ctypes.CDLL):
     ]
     lib.slice.restype = handler
-    lib.compile.argtypes = [handler]
+    lib.compile.argtypes = [handler, ctypes.c_int]
     lib.compile.restype = handler
     lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
@@ -160,6 +160,7 @@ def init_network_factory(lib: ctypes.CDLL):
         ctypes.c_bool,
         ctypes.c_char_p,
         ctypes.c_char_p,
+        ctypes.c_bool,
     ]
     lib.linear.restype = handler
@@ -214,6 +215,41 @@ def init_network_factory(lib: ctypes.CDLL):
     ]
     lib.max_pooling.restype = handler
+    lib.multi_concat.argtypes = [
+        handler,
+        ctypes.POINTER(handler),
+        ctypes.c_uint64,
+        ctypes.c_int64,
+    ]
+    lib.multi_concat.restype = handler
+    lib.dq_split_linear.argtypes = [
+        handler,
+        handler,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_bool,
+        ctypes.c_char_p,
+        ctypes.c_char_p,
+        ctypes.c_bool,
+    ]
+    lib.dq_split_linear.restype = handler
+    lib.dq_split_linear_prefill.argtypes = [
+        handler,
+        handler,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_bool,
+        ctypes.c_char_p,
+        ctypes.c_char_p,
+        ctypes.c_bool,
+    ]
+    lib.dq_split_linear_prefill.restype = handler
     for op in get_supported_ops():
         fn = getattr(lib, op.name)
         fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
@@ -260,6 +296,13 @@ def init_parameters(lib: ctypes.CDLL):
         ctypes.c_int,
     ]
+    lib.addInt4WeightParameter.argtypes = [
+        handler,
+        c_u8_array,
+        ctypes.c_int,
+        ctypes.c_int,
+    ]
 def initialize_bindings() -> ctypes.CDLL:
     """Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.

intel_npu_acceleration_library/backend/factory.py CHANGED Viewed

@@ -95,6 +95,75 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         return cast(F, wrapper)
+    def return_tensor_for_list_inputs(fn: F) -> F:  # type: ignore
+        """Wrap the output of a function in a Tensor object.
+        This new wrapper add support for List Tensor input.
+        Args:
+            fn (function): Function
+        Returns:
+            function: A function that wraps the output in a Tensor object
+        """
+        def wrapper(self, *args: Any, **kwargs: Any) -> Tensor:
+            """Wrap the output of a function in a Tensor object.
+            Args:
+                args (Any): Variable length argument list
+                kwargs (Any): Arbitrary keyword arguments
+            Returns:
+                Tensor: Tensor object
+            """
+            # Convert Tensor objects to their underlying node
+            # args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
+            new_args = []
+            for arg in args:
+                if isinstance(arg, Tensor):
+                    new_args.append(arg.node)
+                elif isinstance(arg, (tuple, list)):
+                    # for item in arg:
+                    for i in range(len(arg)):
+                        if isinstance(arg[i], Tensor):
+                            arg[i] = arg[i].node
+                    new_args.append(arg)
+                else:
+                    new_args.append(arg)
+            args = tuple(new_args)
+            kwargs = {
+                k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
+            }
+            # input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
+            #     v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
+            # ]
+            input_nodes = []
+            for arg in args:
+                if isinstance(arg, ctypes._Pointer):
+                    input_nodes.append(arg)
+                elif isinstance(arg, (tuple, list)):
+                    for item in arg:
+                        if isinstance(item, ctypes._Pointer):
+                            input_nodes.append(item)
+            input_nodes +=  [v for v in kwargs.values() if isinstance(v, ctypes._Pointer)]
+            # Call the function
+            node = fn(self, *args, **kwargs)
+            # remove input nodes from output_nodes
+            self.output_nodes = [
+                node for node in self.output_nodes if node not in input_nodes
+            ]
+            # add output node to output_nodes
+            if fn.__name__ != "constant":
+                self.output_nodes.append(node)
+            # Wrap the node in a Tensor object
+            return Tensor(factory=self, node=node)
+        return cast(F, wrapper)
     @return_tensor
     def _call_backend_op(self, op_name: str, *parameters: Any) -> Any:
         """Dynamically call a backend operation.
@@ -319,6 +388,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         bias: Optional[bool] = False,
         act_dtype: npt.DTypeLike = np.float16,
         wt_dtype: npt.DTypeLike = np.float16,
+        scale_factor: bool = True,
     ) -> ctypes._Pointer:
         """Generate a linear layer.
@@ -341,7 +411,40 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             bias,
             self.get_backend_dtype(act_dtype),
             self.get_backend_dtype(wt_dtype),
+            scale_factor
         )
+    @return_tensor
+    def dq_split_linear(
+        self, input_node: ctypes._Pointer, n_splits: int,
+        outout_channels: int, input_channels: int, bias: bool = False,
+        act_dtype: npt.DTypeLike = np.float16,
+        wt_dtype: npt.DTypeLike = np.float16,
+        scale_factor: bool = True,
+        is_prefill: bool = False,
+    ) -> ctypes._Pointer:
+        """Generate a linear layer for dynamic quantization linear layer.
+        Args:
+            input_node (ctypes._Pointer): layer input node
+            n_splits (int): number of parts the linear layer is split into
+            output_channels (int): number of output channels
+            input_channels (int): number of input channels
+            bias (bool, optional): enable/disable bias. Defaults to False.
+            act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
+            wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
+            scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
+            is_prefill (bool, optional): enable/disable prefill linear optimization. Default to True.
+        Returns:
+            ctypes._Pointer: output node
+        """
+        func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
+        return func(self._mm, input_node, n_splits,
+                    input_channels, outout_channels, bias,
+                    self.get_backend_dtype(act_dtype),
+                    self.get_backend_dtype(wt_dtype),
+                    scale_factor)
     @return_tensor
     def reshape(
@@ -474,6 +577,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         axis = np.int64(axis)
         return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
+    @return_tensor_for_list_inputs
+    def sequence_concat(
+        self, input_nodes: List[ctypes._Pointer], axis: int
+    ) -> ctypes._Pointer:
+        """Generate a concatenation layer.
+        Args:
+            input_nodes (List[ctypes._Pointer]): sequence of layer input node
+            axis (int): axis
+        Returns:
+            ctypes._Pointer: output node
+        """
+        if axis < 0:
+            shape_size = backend_lib.op_shape_size(input_nodes[0])
+            axis = (axis + shape_size) % shape_size
+        axis = np.int64(axis)
+        input_ptr = (ctypes.POINTER(ctypes.c_char) * len(input_nodes))(*input_nodes)
+        return backend_lib.multi_concat(self._mm, input_ptr, len(input_nodes), axis)
     @return_tensor
     def reduce_max(
         self,
@@ -777,6 +901,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             auto_pad,  # auto_pad
         )
+    @return_tensor
+    def scaled_dot_product_attention(
+        self, query: ctypes._Pointer, key: ctypes._Pointer,
+        value: ctypes._Pointer, attn_mask: ctypes._Pointer,
+        is_causal: bool
+    ) -> ctypes._Pointer:
+        """Constructs a ScaledDotProductAttention operation.
+        Args:
+            query (ctypes._Pointer): query
+            key (ctypes._Pointer): key
+            value (ctypes._Pointer): value
+            attn_mask (ctypes._Pointer): attention mask
+            is_causal (ctypes._Pointer): causal/not causal
+        Returns:
+            ctypes._Pointer: output node
+        """
+        return backend_lib.scaled_dot_product_attention(self._mm,
+                                                        query, key,
+                                                        value, attn_mask,
+                                                        is_causal)
     def get_tensor_shape(self, node):
         """Get tensor shape.
@@ -826,7 +971,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         else:
             raise RuntimeError("Unsupported dtype")
-    def compile(self):
+    def compile(self, npu_dpu_groups=4):
         """Finalize and compile a model."""
         self.out = []
         self.torch_out = []
@@ -834,7 +979,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             backend_lib.result(self._mm, node)
         # Compile the model
-        backend_lib.compile(self._mm)
+        backend_lib.compile(self._mm, npu_dpu_groups)
         for idx, node in enumerate(self.output_nodes):
             output_shape = self.get_tensor_shape(node)

intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll CHANGED Viewed

Binary file

{bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/top_level.txt RENAMED Viewed

File without changes