bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0b20241101__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/METADATA +1 -1
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/RECORD +9 -9
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/WHEEL +1 -1
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/base.py +8 -1
- intel_npu_acceleration_library/backend/bindings.py +44 -1
- intel_npu_acceleration_library/backend/factory.py +147 -2
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b20241101.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
|
2
|
-
intel_npu_acceleration_library/_version.py,sha256
|
2
|
+
intel_npu_acceleration_library/_version.py,sha256=jcHRT5PZ4LFo80UDBbT1tq0P4zoXqHJw1NiKteCpiQw,112
|
3
3
|
intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
|
4
4
|
intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
|
5
5
|
intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
|
@@ -7,11 +7,11 @@ intel_npu_acceleration_library/modelling.py,sha256=vSiQOWGJ0l6wGV7zWQtZEkHpnMQIM
|
|
7
7
|
intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v0oInX4YzvkjnqViA70,5469
|
8
8
|
intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
|
9
9
|
intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
|
10
|
-
intel_npu_acceleration_library/backend/base.py,sha256=
|
11
|
-
intel_npu_acceleration_library/backend/bindings.py,sha256=
|
10
|
+
intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
|
11
|
+
intel_npu_acceleration_library/backend/bindings.py,sha256=cla6JRX7pqUDuRmsXN6K9cAKklHz_mb6butatR2Eu9I,8901
|
12
12
|
intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
|
13
13
|
intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
|
14
|
-
intel_npu_acceleration_library/backend/factory.py,sha256=
|
14
|
+
intel_npu_acceleration_library/backend/factory.py,sha256=n63KE8X9eOuv2m2MiQFASjzgnkIM9deGtDC-qSHRMMw,38847
|
15
15
|
intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
|
16
16
|
intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
|
17
17
|
intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
|
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
|
|
187
187
|
intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
|
188
188
|
intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
|
189
189
|
intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
|
190
|
-
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=
|
190
|
+
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=wmiXqhTSSogchjl8WNQnNYwjHiMtyYOvPvATeOUQkt8,304640
|
191
191
|
intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
|
192
192
|
intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
|
193
193
|
intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
|
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
|
|
217
217
|
intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
|
218
218
|
intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
|
219
219
|
intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
|
220
|
-
bigdl_core_npu-2.
|
221
|
-
bigdl_core_npu-2.
|
222
|
-
bigdl_core_npu-2.
|
223
|
-
bigdl_core_npu-2.
|
220
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/METADATA,sha256=oOcjv-wWArv5l6x58K0TH2hhhvYI11BjUXyMYRFRcGc,1543
|
221
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/WHEEL,sha256=09_eAv2LFHDbyhcOULd5e3WJrC_F5q7AlLDftiw-PyE,101
|
222
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
|
223
|
+
bigdl_core_npu-2.6.0b20241101.dist-info/RECORD,,
|
@@ -153,7 +153,14 @@ class BaseNPUBackendWithPrefetch(BaseNPUBackend):
|
|
153
153
|
raise ValueError(f"Invalid dtype for scale: {scale.dtype}")
|
154
154
|
else:
|
155
155
|
adapted_weights, shape = adapt_weight(weight)
|
156
|
-
|
156
|
+
if weight.dtype == np.uint8:
|
157
|
+
backend_lib.addInt4WeightParameter(
|
158
|
+
param,
|
159
|
+
adapted_weights,
|
160
|
+
*shape,
|
161
|
+
)
|
162
|
+
else:
|
163
|
+
backend_lib.addFloatParameter(param, adapted_weights, *shape)
|
157
164
|
elif isinstance(weights, np.ndarray):
|
158
165
|
adapted_weights, shape = adapt_weight(weights)
|
159
166
|
backend_lib.addFloatParameter(param, adapted_weights, *shape)
|
@@ -143,7 +143,7 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
143
143
|
]
|
144
144
|
lib.slice.restype = handler
|
145
145
|
|
146
|
-
lib.compile.argtypes = [handler]
|
146
|
+
lib.compile.argtypes = [handler, ctypes.c_int]
|
147
147
|
lib.compile.restype = handler
|
148
148
|
|
149
149
|
lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
|
@@ -160,6 +160,7 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
160
160
|
ctypes.c_bool,
|
161
161
|
ctypes.c_char_p,
|
162
162
|
ctypes.c_char_p,
|
163
|
+
ctypes.c_bool,
|
163
164
|
]
|
164
165
|
lib.linear.restype = handler
|
165
166
|
|
@@ -214,6 +215,41 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
214
215
|
]
|
215
216
|
lib.max_pooling.restype = handler
|
216
217
|
|
218
|
+
|
219
|
+
lib.multi_concat.argtypes = [
|
220
|
+
handler,
|
221
|
+
ctypes.POINTER(handler),
|
222
|
+
ctypes.c_uint64,
|
223
|
+
ctypes.c_int64,
|
224
|
+
]
|
225
|
+
lib.multi_concat.restype = handler
|
226
|
+
|
227
|
+
lib.dq_split_linear.argtypes = [
|
228
|
+
handler,
|
229
|
+
handler,
|
230
|
+
ctypes.c_int,
|
231
|
+
ctypes.c_int,
|
232
|
+
ctypes.c_int,
|
233
|
+
ctypes.c_bool,
|
234
|
+
ctypes.c_char_p,
|
235
|
+
ctypes.c_char_p,
|
236
|
+
ctypes.c_bool,
|
237
|
+
]
|
238
|
+
lib.dq_split_linear.restype = handler
|
239
|
+
|
240
|
+
lib.dq_split_linear_prefill.argtypes = [
|
241
|
+
handler,
|
242
|
+
handler,
|
243
|
+
ctypes.c_int,
|
244
|
+
ctypes.c_int,
|
245
|
+
ctypes.c_int,
|
246
|
+
ctypes.c_bool,
|
247
|
+
ctypes.c_char_p,
|
248
|
+
ctypes.c_char_p,
|
249
|
+
ctypes.c_bool,
|
250
|
+
]
|
251
|
+
lib.dq_split_linear_prefill.restype = handler
|
252
|
+
|
217
253
|
for op in get_supported_ops():
|
218
254
|
fn = getattr(lib, op.name)
|
219
255
|
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
|
@@ -260,6 +296,13 @@ def init_parameters(lib: ctypes.CDLL):
|
|
260
296
|
ctypes.c_int,
|
261
297
|
]
|
262
298
|
|
299
|
+
lib.addInt4WeightParameter.argtypes = [
|
300
|
+
handler,
|
301
|
+
c_u8_array,
|
302
|
+
ctypes.c_int,
|
303
|
+
ctypes.c_int,
|
304
|
+
]
|
305
|
+
|
263
306
|
|
264
307
|
def initialize_bindings() -> ctypes.CDLL:
|
265
308
|
"""Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.
|
@@ -95,6 +95,75 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
95
95
|
|
96
96
|
return cast(F, wrapper)
|
97
97
|
|
98
|
+
def return_tensor_for_list_inputs(fn: F) -> F: # type: ignore
|
99
|
+
"""Wrap the output of a function in a Tensor object.
|
100
|
+
This new wrapper add support for List Tensor input.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
fn (function): Function
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
function: A function that wraps the output in a Tensor object
|
107
|
+
"""
|
108
|
+
|
109
|
+
def wrapper(self, *args: Any, **kwargs: Any) -> Tensor:
|
110
|
+
"""Wrap the output of a function in a Tensor object.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
args (Any): Variable length argument list
|
114
|
+
kwargs (Any): Arbitrary keyword arguments
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Tensor: Tensor object
|
118
|
+
"""
|
119
|
+
# Convert Tensor objects to their underlying node
|
120
|
+
# args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
121
|
+
new_args = []
|
122
|
+
for arg in args:
|
123
|
+
if isinstance(arg, Tensor):
|
124
|
+
new_args.append(arg.node)
|
125
|
+
elif isinstance(arg, (tuple, list)):
|
126
|
+
# for item in arg:
|
127
|
+
for i in range(len(arg)):
|
128
|
+
if isinstance(arg[i], Tensor):
|
129
|
+
arg[i] = arg[i].node
|
130
|
+
new_args.append(arg)
|
131
|
+
else:
|
132
|
+
new_args.append(arg)
|
133
|
+
args = tuple(new_args)
|
134
|
+
kwargs = {
|
135
|
+
k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
|
136
|
+
}
|
137
|
+
|
138
|
+
# input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
|
139
|
+
# v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
|
140
|
+
# ]
|
141
|
+
input_nodes = []
|
142
|
+
for arg in args:
|
143
|
+
if isinstance(arg, ctypes._Pointer):
|
144
|
+
input_nodes.append(arg)
|
145
|
+
elif isinstance(arg, (tuple, list)):
|
146
|
+
for item in arg:
|
147
|
+
if isinstance(item, ctypes._Pointer):
|
148
|
+
input_nodes.append(item)
|
149
|
+
input_nodes += [v for v in kwargs.values() if isinstance(v, ctypes._Pointer)]
|
150
|
+
|
151
|
+
# Call the function
|
152
|
+
node = fn(self, *args, **kwargs)
|
153
|
+
|
154
|
+
# remove input nodes from output_nodes
|
155
|
+
self.output_nodes = [
|
156
|
+
node for node in self.output_nodes if node not in input_nodes
|
157
|
+
]
|
158
|
+
# add output node to output_nodes
|
159
|
+
if fn.__name__ != "constant":
|
160
|
+
self.output_nodes.append(node)
|
161
|
+
|
162
|
+
# Wrap the node in a Tensor object
|
163
|
+
return Tensor(factory=self, node=node)
|
164
|
+
|
165
|
+
return cast(F, wrapper)
|
166
|
+
|
98
167
|
@return_tensor
|
99
168
|
def _call_backend_op(self, op_name: str, *parameters: Any) -> Any:
|
100
169
|
"""Dynamically call a backend operation.
|
@@ -319,6 +388,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
319
388
|
bias: Optional[bool] = False,
|
320
389
|
act_dtype: npt.DTypeLike = np.float16,
|
321
390
|
wt_dtype: npt.DTypeLike = np.float16,
|
391
|
+
scale_factor: bool = True,
|
322
392
|
) -> ctypes._Pointer:
|
323
393
|
"""Generate a linear layer.
|
324
394
|
|
@@ -341,7 +411,40 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
341
411
|
bias,
|
342
412
|
self.get_backend_dtype(act_dtype),
|
343
413
|
self.get_backend_dtype(wt_dtype),
|
414
|
+
scale_factor
|
344
415
|
)
|
416
|
+
|
417
|
+
@return_tensor
|
418
|
+
def dq_split_linear(
|
419
|
+
self, input_node: ctypes._Pointer, n_splits: int,
|
420
|
+
outout_channels: int, input_channels: int, bias: bool = False,
|
421
|
+
act_dtype: npt.DTypeLike = np.float16,
|
422
|
+
wt_dtype: npt.DTypeLike = np.float16,
|
423
|
+
scale_factor: bool = True,
|
424
|
+
is_prefill: bool = False,
|
425
|
+
) -> ctypes._Pointer:
|
426
|
+
"""Generate a linear layer for dynamic quantization linear layer.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
input_node (ctypes._Pointer): layer input node
|
430
|
+
n_splits (int): number of parts the linear layer is split into
|
431
|
+
output_channels (int): number of output channels
|
432
|
+
input_channels (int): number of input channels
|
433
|
+
bias (bool, optional): enable/disable bias. Defaults to False.
|
434
|
+
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
|
435
|
+
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
|
436
|
+
scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
|
437
|
+
is_prefill (bool, optional): enable/disable prefill linear optimization. Default to True.
|
438
|
+
|
439
|
+
Returns:
|
440
|
+
ctypes._Pointer: output node
|
441
|
+
"""
|
442
|
+
func = backend_lib.dq_split_linear_prefill if is_prefill else backend_lib.dq_split_linear
|
443
|
+
return func(self._mm, input_node, n_splits,
|
444
|
+
input_channels, outout_channels, bias,
|
445
|
+
self.get_backend_dtype(act_dtype),
|
446
|
+
self.get_backend_dtype(wt_dtype),
|
447
|
+
scale_factor)
|
345
448
|
|
346
449
|
@return_tensor
|
347
450
|
def reshape(
|
@@ -474,6 +577,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
474
577
|
axis = np.int64(axis)
|
475
578
|
return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
|
476
579
|
|
580
|
+
@return_tensor_for_list_inputs
|
581
|
+
def sequence_concat(
|
582
|
+
self, input_nodes: List[ctypes._Pointer], axis: int
|
583
|
+
) -> ctypes._Pointer:
|
584
|
+
"""Generate a concatenation layer.
|
585
|
+
|
586
|
+
Args:
|
587
|
+
input_nodes (List[ctypes._Pointer]): sequence of layer input node
|
588
|
+
axis (int): axis
|
589
|
+
|
590
|
+
Returns:
|
591
|
+
ctypes._Pointer: output node
|
592
|
+
"""
|
593
|
+
if axis < 0:
|
594
|
+
shape_size = backend_lib.op_shape_size(input_nodes[0])
|
595
|
+
axis = (axis + shape_size) % shape_size
|
596
|
+
axis = np.int64(axis)
|
597
|
+
|
598
|
+
input_ptr = (ctypes.POINTER(ctypes.c_char) * len(input_nodes))(*input_nodes)
|
599
|
+
return backend_lib.multi_concat(self._mm, input_ptr, len(input_nodes), axis)
|
600
|
+
|
477
601
|
@return_tensor
|
478
602
|
def reduce_max(
|
479
603
|
self,
|
@@ -777,6 +901,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
777
901
|
auto_pad, # auto_pad
|
778
902
|
)
|
779
903
|
|
904
|
+
@return_tensor
|
905
|
+
def scaled_dot_product_attention(
|
906
|
+
self, query: ctypes._Pointer, key: ctypes._Pointer,
|
907
|
+
value: ctypes._Pointer, attn_mask: ctypes._Pointer,
|
908
|
+
is_causal: bool
|
909
|
+
) -> ctypes._Pointer:
|
910
|
+
"""Constructs a ScaledDotProductAttention operation.
|
911
|
+
Args:
|
912
|
+
query (ctypes._Pointer): query
|
913
|
+
key (ctypes._Pointer): key
|
914
|
+
value (ctypes._Pointer): value
|
915
|
+
attn_mask (ctypes._Pointer): attention mask
|
916
|
+
is_causal (ctypes._Pointer): causal/not causal
|
917
|
+
Returns:
|
918
|
+
ctypes._Pointer: output node
|
919
|
+
"""
|
920
|
+
return backend_lib.scaled_dot_product_attention(self._mm,
|
921
|
+
query, key,
|
922
|
+
value, attn_mask,
|
923
|
+
is_causal)
|
924
|
+
|
780
925
|
def get_tensor_shape(self, node):
|
781
926
|
"""Get tensor shape.
|
782
927
|
|
@@ -826,7 +971,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
826
971
|
else:
|
827
972
|
raise RuntimeError("Unsupported dtype")
|
828
973
|
|
829
|
-
def compile(self):
|
974
|
+
def compile(self, npu_dpu_groups=4):
|
830
975
|
"""Finalize and compile a model."""
|
831
976
|
self.out = []
|
832
977
|
self.torch_out = []
|
@@ -834,7 +979,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
834
979
|
backend_lib.result(self._mm, node)
|
835
980
|
|
836
981
|
# Compile the model
|
837
|
-
backend_lib.compile(self._mm)
|
982
|
+
backend_lib.compile(self._mm, npu_dpu_groups)
|
838
983
|
|
839
984
|
for idx, node in enumerate(self.output_nodes):
|
840
985
|
output_shape = self.get_tensor_shape(node)
|
Binary file
|
File without changes
|