bigdl-core-npu 2.6.0b2__cp311-cp311-win_amd64.whl → 2.6.0b20241101__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/METADATA +1 -1
- {bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/RECORD +8 -8
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/bindings.py +14 -1
- intel_npu_acceleration_library/backend/factory.py +32 -7
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- {bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/WHEEL +0 -0
- {bigdl_core_npu-2.6.0b2.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
|
2
|
-
intel_npu_acceleration_library/_version.py,sha256=
|
2
|
+
intel_npu_acceleration_library/_version.py,sha256=jcHRT5PZ4LFo80UDBbT1tq0P4zoXqHJw1NiKteCpiQw,112
|
3
3
|
intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
|
4
4
|
intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
|
5
5
|
intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
|
@@ -8,10 +8,10 @@ intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v
|
|
8
8
|
intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
|
9
9
|
intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
|
10
10
|
intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
|
11
|
-
intel_npu_acceleration_library/backend/bindings.py,sha256=
|
11
|
+
intel_npu_acceleration_library/backend/bindings.py,sha256=cla6JRX7pqUDuRmsXN6K9cAKklHz_mb6butatR2Eu9I,8901
|
12
12
|
intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
|
13
13
|
intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
|
14
|
-
intel_npu_acceleration_library/backend/factory.py,sha256=
|
14
|
+
intel_npu_acceleration_library/backend/factory.py,sha256=n63KE8X9eOuv2m2MiQFASjzgnkIM9deGtDC-qSHRMMw,38847
|
15
15
|
intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
|
16
16
|
intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
|
17
17
|
intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
|
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
|
|
187
187
|
intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
|
188
188
|
intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
|
189
189
|
intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
|
190
|
-
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=
|
190
|
+
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=iJCSCrePLKeLoyc9ZOCWiRIQbah1HVJWLooN_z4PEgQ,304640
|
191
191
|
intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
|
192
192
|
intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
|
193
193
|
intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
|
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
|
|
217
217
|
intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
|
218
218
|
intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
|
219
219
|
intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
|
220
|
-
bigdl_core_npu-2.6.
|
221
|
-
bigdl_core_npu-2.6.
|
222
|
-
bigdl_core_npu-2.6.
|
223
|
-
bigdl_core_npu-2.6.
|
220
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/METADATA,sha256=oOcjv-wWArv5l6x58K0TH2hhhvYI11BjUXyMYRFRcGc,1543
|
221
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
|
222
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
|
223
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/RECORD,,
|
@@ -143,7 +143,7 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
143
143
|
]
|
144
144
|
lib.slice.restype = handler
|
145
145
|
|
146
|
-
lib.compile.argtypes = [handler]
|
146
|
+
lib.compile.argtypes = [handler, ctypes.c_int]
|
147
147
|
lib.compile.restype = handler
|
148
148
|
|
149
149
|
lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
|
@@ -237,6 +237,19 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
237
237
|
]
|
238
238
|
lib.dq_split_linear.restype = handler
|
239
239
|
|
240
|
+
lib.dq_split_linear_prefill.argtypes = [
|
241
|
+
handler,
|
242
|
+
handler,
|
243
|
+
ctypes.c_int,
|
244
|
+
ctypes.c_int,
|
245
|
+
ctypes.c_int,
|
246
|
+
ctypes.c_bool,
|
247
|
+
ctypes.c_char_p,
|
248
|
+
ctypes.c_char_p,
|
249
|
+
ctypes.c_bool,
|
250
|
+
]
|
251
|
+
lib.dq_split_linear_prefill.restype = handler
|
252
|
+
|
240
253
|
for op in get_supported_ops():
|
241
254
|
fn = getattr(lib, op.name)
|
242
255
|
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
|
@@ -421,6 +421,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
421
421
|
act_dtype: npt.DTypeLike = np.float16,
|
422
422
|
wt_dtype: npt.DTypeLike = np.float16,
|
423
423
|
scale_factor: bool = True,
|
424
|
+
is_prefill: bool = False,
|
424
425
|
) -> ctypes._Pointer:
|
425
426
|
"""Generate a linear layer for dynamic quantization linear layer.
|
426
427
|
|
@@ -432,15 +433,18 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
432
433
|
bias (bool, optional): enable/disable bias. Defaults to False.
|
433
434
|
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
|
434
435
|
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
|
436
|
+
scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
|
437
|
+
is_prefill (bool, optional): enable/disable prefill linear optimization. Default to True.
|
435
438
|
|
436
439
|
Returns:
|
437
440
|
ctypes._Pointer: output node
|
438
441
|
"""
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
442
|
+
func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
|
443
|
+
return func(self._mm, input_node, n_splits,
|
444
|
+
input_channels, outout_channels, bias,
|
445
|
+
self.get_backend_dtype(act_dtype),
|
446
|
+
self.get_backend_dtype(wt_dtype),
|
447
|
+
scale_factor)
|
444
448
|
|
445
449
|
@return_tensor
|
446
450
|
def reshape(
|
@@ -897,6 +901,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
897
901
|
auto_pad, # auto_pad
|
898
902
|
)
|
899
903
|
|
904
|
+
@return_tensor
|
905
|
+
def scaled_dot_product_attention(
|
906
|
+
self, query: ctypes._Pointer, key: ctypes._Pointer,
|
907
|
+
value: ctypes._Pointer, attn_mask: ctypes._Pointer,
|
908
|
+
is_causal: bool
|
909
|
+
) -> ctypes._Pointer:
|
910
|
+
"""Constructs a ScaledDotProductAttention operation.
|
911
|
+
Args:
|
912
|
+
query (ctypes._Pointer): query
|
913
|
+
key (ctypes._Pointer): key
|
914
|
+
value (ctypes._Pointer): value
|
915
|
+
attn_mask (ctypes._Pointer): attention mask
|
916
|
+
is_causal (ctypes._Pointer): causal/not causal
|
917
|
+
Returns:
|
918
|
+
ctypes._Pointer: output node
|
919
|
+
"""
|
920
|
+
return backend_lib.scaled_dot_product_attention(self._mm,
|
921
|
+
query, key,
|
922
|
+
value, attn_mask,
|
923
|
+
is_causal)
|
924
|
+
|
900
925
|
def get_tensor_shape(self, node):
|
901
926
|
"""Get tensor shape.
|
902
927
|
|
@@ -946,7 +971,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
946
971
|
else:
|
947
972
|
raise RuntimeError("Unsupported dtype")
|
948
973
|
|
949
|
-
def compile(self):
|
974
|
+
def compile(self, npu_dpu_groups=4):
|
950
975
|
"""Finalize and compile a model."""
|
951
976
|
self.out = []
|
952
977
|
self.torch_out = []
|
@@ -954,7 +979,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
954
979
|
backend_lib.result(self._mm, node)
|
955
980
|
|
956
981
|
# Compile the model
|
957
|
-
backend_lib.compile(self._mm)
|
982
|
+
backend_lib.compile(self._mm, npu_dpu_groups)
|
958
983
|
|
959
984
|
for idx, node in enumerate(self.output_nodes):
|
960
985
|
output_shape = self.get_tensor_shape(node)
|
Binary file
|
File without changes
|
File without changes
|