bigdl-core-npu 2.6.0b2__cp311-cp311-win_amd64.whl → 2.6.0b20241103__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-npu
3
- Version: 2.6.0b2
3
+ Version: 2.6.0b20241103
4
4
  Summary: Intel® NPU Acceleration Library
5
5
  Home-page: https://github.com/intel/intel-npu-acceleration-library
6
6
  Author: Alessandro Palla
@@ -1,5 +1,5 @@
1
1
  intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
2
- intel_npu_acceleration_library/_version.py,sha256=o0ui4TyomjRVwLdl9zwG9b1pXUD6KLzj1zUaNOMu9V8,105
2
+ intel_npu_acceleration_library/_version.py,sha256=JfJivKuzXFm3vqyuVhzyZq1BC4oaae5rYiCmb_9n7HI,112
3
3
  intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
4
4
  intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
5
5
  intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
@@ -8,10 +8,10 @@ intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v
8
8
  intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
9
9
  intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
10
10
  intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
11
- intel_npu_acceleration_library/backend/bindings.py,sha256=g802BSzQUqAS-LzgUP4UGfrxYHNG397we31xanDs3B8,8576
11
+ intel_npu_acceleration_library/backend/bindings.py,sha256=cla6JRX7pqUDuRmsXN6K9cAKklHz_mb6butatR2Eu9I,8901
12
12
  intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
13
13
  intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
14
- intel_npu_acceleration_library/backend/factory.py,sha256=WqnpZDT3do8213BzSZLr0z8_d3sVyuBhYURZH1rEDAA,37688
14
+ intel_npu_acceleration_library/backend/factory.py,sha256=n63KE8X9eOuv2m2MiQFASjzgnkIM9deGtDC-qSHRMMw,38847
15
15
  intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
16
16
  intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
17
17
  intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
187
187
  intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
188
188
  intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
189
189
  intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
190
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=ZxYdf6i90mDDNCd4jgtDDfFXFqGwsopGA6u5I9nWdRo,301056
190
+ intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=DQZEP3lcwX8HIuiWbpbE3-WyMRUCKuxUFtabIHejJXY,304640
191
191
  intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
192
192
  intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
193
193
  intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
217
217
  intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
218
218
  intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
219
219
  intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
220
- bigdl_core_npu-2.6.0b2.dist-info/METADATA,sha256=9h9wb4lWrHS8XM0QCcw9PPeZevup5swuMihy_tBBqZE,1536
221
- bigdl_core_npu-2.6.0b2.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
222
- bigdl_core_npu-2.6.0b2.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
223
- bigdl_core_npu-2.6.0b2.dist-info/RECORD,,
220
+ bigdl_core_npu-2.6.0b20241103.dist-info/METADATA,sha256=7rc3G9Nh-kun6Mc_hRDdMt_2HgqTTMvKVUEemmfAXX8,1543
221
+ bigdl_core_npu-2.6.0b20241103.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
222
+ bigdl_core_npu-2.6.0b20241103.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
223
+ bigdl_core_npu-2.6.0b20241103.dist-info/RECORD,,
@@ -3,4 +3,4 @@
3
3
  # SPDX-License-Identifier: Apache 2.0
4
4
  #
5
5
 
6
- __version__ = "2.6.0b2"
6
+ __version__ = "2.6.0b20241103"
@@ -143,7 +143,7 @@ def init_network_factory(lib: ctypes.CDLL):
143
143
  ]
144
144
  lib.slice.restype = handler
145
145
 
146
- lib.compile.argtypes = [handler]
146
+ lib.compile.argtypes = [handler, ctypes.c_int]
147
147
  lib.compile.restype = handler
148
148
 
149
149
  lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
@@ -237,6 +237,19 @@ def init_network_factory(lib: ctypes.CDLL):
237
237
  ]
238
238
  lib.dq_split_linear.restype = handler
239
239
 
240
+ lib.dq_split_linear_prefill.argtypes = [
241
+ handler,
242
+ handler,
243
+ ctypes.c_int,
244
+ ctypes.c_int,
245
+ ctypes.c_int,
246
+ ctypes.c_bool,
247
+ ctypes.c_char_p,
248
+ ctypes.c_char_p,
249
+ ctypes.c_bool,
250
+ ]
251
+ lib.dq_split_linear_prefill.restype = handler
252
+
240
253
  for op in get_supported_ops():
241
254
  fn = getattr(lib, op.name)
242
255
  fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
@@ -421,6 +421,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
421
421
  act_dtype: npt.DTypeLike = np.float16,
422
422
  wt_dtype: npt.DTypeLike = np.float16,
423
423
  scale_factor: bool = True,
424
+ is_prefill: bool = False,
424
425
  ) -> ctypes._Pointer:
425
426
  """Generate a linear layer for dynamic quantization linear layer.
426
427
 
@@ -432,15 +433,18 @@ class NNFactory(BaseNPUBackendWithPrefetch):
432
433
  bias (bool, optional): enable/disable bias. Defaults to False.
433
434
  act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
434
435
  wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
436
+ scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
437
+ is_prefill (bool, optional): enable/disable prefill linear optimization. Default to True.
435
438
 
436
439
  Returns:
437
440
  ctypes._Pointer: output node
438
441
  """
439
- return backend_lib.dq_split_linear(self._mm, input_node, n_splits,
440
- input_channels, outout_channels, bias,
441
- self.get_backend_dtype(act_dtype),
442
- self.get_backend_dtype(wt_dtype),
443
- scale_factor)
442
+ func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
443
+ return func(self._mm, input_node, n_splits,
444
+ input_channels, outout_channels, bias,
445
+ self.get_backend_dtype(act_dtype),
446
+ self.get_backend_dtype(wt_dtype),
447
+ scale_factor)
444
448
 
445
449
  @return_tensor
446
450
  def reshape(
@@ -897,6 +901,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
897
901
  auto_pad, # auto_pad
898
902
  )
899
903
 
904
+ @return_tensor
905
+ def scaled_dot_product_attention(
906
+ self, query: ctypes._Pointer, key: ctypes._Pointer,
907
+ value: ctypes._Pointer, attn_mask: ctypes._Pointer,
908
+ is_causal: bool
909
+ ) -> ctypes._Pointer:
910
+ """Constructs a ScaledDotProductAttention operation.
911
+ Args:
912
+ query (ctypes._Pointer): query
913
+ key (ctypes._Pointer): key
914
+ value (ctypes._Pointer): value
915
+ attn_mask (ctypes._Pointer): attention mask
916
+ is_causal (ctypes._Pointer): causal/not causal
917
+ Returns:
918
+ ctypes._Pointer: output node
919
+ """
920
+ return backend_lib.scaled_dot_product_attention(self._mm,
921
+ query, key,
922
+ value, attn_mask,
923
+ is_causal)
924
+
900
925
  def get_tensor_shape(self, node):
901
926
  """Get tensor shape.
902
927
 
@@ -946,7 +971,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
946
971
  else:
947
972
  raise RuntimeError("Unsupported dtype")
948
973
 
949
- def compile(self):
974
+ def compile(self, npu_dpu_groups=4):
950
975
  """Finalize and compile a model."""
951
976
  self.out = []
952
977
  self.torch_out = []
@@ -954,7 +979,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
954
979
  backend_lib.result(self._mm, node)
955
980
 
956
981
  # Compile the model
957
- backend_lib.compile(self._mm)
982
+ backend_lib.compile(self._mm, npu_dpu_groups)
958
983
 
959
984
  for idx, node in enumerate(self.output_nodes):
960
985
  output_shape = self.get_tensor_shape(node)