PyPI - bigdl-core-npu - Versions diffs - 2.6.0b2__cp311-cp311-win_amd64.whl → 2.6.0b20241101__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b2__cp311-cp311-win_amd64.whl → 2.6.0b20241101__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bigdl-core-npu
-Version: 2.6.0b2
+Version: 2.6.0b20241101
 Summary: Intel® NPU Acceleration Library
 Home-page: https://github.com/intel/intel-npu-acceleration-library
 Author: Alessandro Palla

{bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
-intel_npu_acceleration_library/_version.py,sha256=o0ui4TyomjRVwLdl9zwG9b1pXUD6KLzj1zUaNOMu9V8,105
+intel_npu_acceleration_library/_version.py,sha256=jcHRT5PZ4LFo80UDBbT1tq0P4zoXqHJw1NiKteCpiQw,112
 intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
 intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
 intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
@@ -8,10 +8,10 @@ intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v
 intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
 intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
 intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
-intel_npu_acceleration_library/backend/bindings.py,sha256=g802BSzQUqAS-LzgUP4UGfrxYHNG397we31xanDs3B8,8576
+intel_npu_acceleration_library/backend/bindings.py,sha256=cla6JRX7pqUDuRmsXN6K9cAKklHz_mb6butatR2Eu9I,8901
 intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
 intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
-intel_npu_acceleration_library/backend/factory.py,sha256=WqnpZDT3do8213BzSZLr0z8_d3sVyuBhYURZH1rEDAA,37688
+intel_npu_acceleration_library/backend/factory.py,sha256=n63KE8X9eOuv2m2MiQFASjzgnkIM9deGtDC-qSHRMMw,38847
 intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
 intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
 intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
 intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
 intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
 intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
-intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=ZxYdf6i90mDDNCd4jgtDDfFXFqGwsopGA6u5I9nWdRo,301056
+intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=iJCSCrePLKeLoyc9ZOCWiRIQbah1HVJWLooN_z4PEgQ,304640
 intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
 intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
 intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
 intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
 intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
 intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
-bigdl_core_npu-2.6.0b2.dist-info/METADATA,sha256=9h9wb4lWrHS8XM0QCcw9PPeZevup5swuMihy_tBBqZE,1536
-bigdl_core_npu-2.6.0b2.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
-bigdl_core_npu-2.6.0b2.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
-bigdl_core_npu-2.6.0b2.dist-info/RECORD,,
+bigdl_core_npu-2.6.0b20241101.dist-info/METADATA,sha256=oOcjv-wWArv5l6x58K0TH2hhhvYI11BjUXyMYRFRcGc,1543
+bigdl_core_npu-2.6.0b20241101.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
+bigdl_core_npu-2.6.0b20241101.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
+bigdl_core_npu-2.6.0b20241101.dist-info/RECORD,,

intel_npu_acceleration_library/_version.py CHANGED Viewed

@@ -3,4 +3,4 @@
 # SPDX-License-Identifier: Apache 2.0
 #
-__version__ = "2.6.0b2"
+__version__ = "2.6.0b20241101"

intel_npu_acceleration_library/backend/bindings.py CHANGED Viewed

@@ -143,7 +143,7 @@ def init_network_factory(lib: ctypes.CDLL):
     ]
     lib.slice.restype = handler
-    lib.compile.argtypes = [handler]
+    lib.compile.argtypes = [handler, ctypes.c_int]
     lib.compile.restype = handler
     lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
@@ -237,6 +237,19 @@ def init_network_factory(lib: ctypes.CDLL):
     ]
     lib.dq_split_linear.restype = handler
+    lib.dq_split_linear_prefill.argtypes = [
+        handler,
+        handler,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_int,
+        ctypes.c_bool,
+        ctypes.c_char_p,
+        ctypes.c_char_p,
+        ctypes.c_bool,
+    ]
+    lib.dq_split_linear_prefill.restype = handler
     for op in get_supported_ops():
         fn = getattr(lib, op.name)
         fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)

intel_npu_acceleration_library/backend/factory.py CHANGED Viewed

@@ -421,6 +421,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         act_dtype: npt.DTypeLike = np.float16,
         wt_dtype: npt.DTypeLike = np.float16,
         scale_factor: bool = True,
+        is_prefill: bool = False,
     ) -> ctypes._Pointer:
         """Generate a linear layer for dynamic quantization linear layer.
@@ -432,15 +433,18 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             bias (bool, optional): enable/disable bias. Defaults to False.
             act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
             wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
+            scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
+            is_prefill (bool, optional): enable/disable prefill linear optimization. Default to True.
         Returns:
             ctypes._Pointer: output node
         """
-        return backend_lib.dq_split_linear(self._mm, input_node, n_splits,
-                                           input_channels, outout_channels, bias,
-                                           self.get_backend_dtype(act_dtype),
-                                           self.get_backend_dtype(wt_dtype),
-                                           scale_factor)
+        func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
+        return func(self._mm, input_node, n_splits,
+                    input_channels, outout_channels, bias,
+                    self.get_backend_dtype(act_dtype),
+                    self.get_backend_dtype(wt_dtype),
+                    scale_factor)
     @return_tensor
     def reshape(
@@ -897,6 +901,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             auto_pad,  # auto_pad
         )
+    @return_tensor
+    def scaled_dot_product_attention(
+        self, query: ctypes._Pointer, key: ctypes._Pointer,
+        value: ctypes._Pointer, attn_mask: ctypes._Pointer,
+        is_causal: bool
+    ) -> ctypes._Pointer:
+        """Constructs a ScaledDotProductAttention operation.
+        Args:
+            query (ctypes._Pointer): query
+            key (ctypes._Pointer): key
+            value (ctypes._Pointer): value
+            attn_mask (ctypes._Pointer): attention mask
+            is_causal (ctypes._Pointer): causal/not causal
+        Returns:
+            ctypes._Pointer: output node
+        """
+        return backend_lib.scaled_dot_product_attention(self._mm,
+                                                        query, key,
+                                                        value, attn_mask,
+                                                        is_causal)
     def get_tensor_shape(self, node):
         """Get tensor shape.
@@ -946,7 +971,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
         else:
             raise RuntimeError("Unsupported dtype")
-    def compile(self):
+    def compile(self, npu_dpu_groups=4):
         """Finalize and compile a model."""
         self.out = []
         self.torch_out = []
@@ -954,7 +979,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
             backend_lib.result(self._mm, node)
         # Compile the model
-        backend_lib.compile(self._mm)
+        backend_lib.compile(self._mm, npu_dpu_groups)
         for idx, node in enumerate(self.output_nodes):
             output_shape = self.get_tensor_shape(node)

intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll CHANGED Viewed

Binary file

{bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/WHEEL RENAMED Viewed

File without changes

{bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/top_level.txt RENAMED Viewed

File without changes