bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0b2__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b2.dist-info}/METADATA +1 -1
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b2.dist-info}/RECORD +9 -9
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b2.dist-info}/WHEEL +1 -1
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/base.py +8 -1
- intel_npu_acceleration_library/backend/bindings.py +30 -0
- intel_npu_acceleration_library/backend/factory.py +120 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0b2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
|
2
|
-
intel_npu_acceleration_library/_version.py,sha256
|
2
|
+
intel_npu_acceleration_library/_version.py,sha256=o0ui4TyomjRVwLdl9zwG9b1pXUD6KLzj1zUaNOMu9V8,105
|
3
3
|
intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
|
4
4
|
intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
|
5
5
|
intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
|
@@ -7,11 +7,11 @@ intel_npu_acceleration_library/modelling.py,sha256=vSiQOWGJ0l6wGV7zWQtZEkHpnMQIM
|
|
7
7
|
intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v0oInX4YzvkjnqViA70,5469
|
8
8
|
intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
|
9
9
|
intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
|
10
|
-
intel_npu_acceleration_library/backend/base.py,sha256=
|
11
|
-
intel_npu_acceleration_library/backend/bindings.py,sha256=
|
10
|
+
intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
|
11
|
+
intel_npu_acceleration_library/backend/bindings.py,sha256=g802BSzQUqAS-LzgUP4UGfrxYHNG397we31xanDs3B8,8576
|
12
12
|
intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
|
13
13
|
intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
|
14
|
-
intel_npu_acceleration_library/backend/factory.py,sha256=
|
14
|
+
intel_npu_acceleration_library/backend/factory.py,sha256=WqnpZDT3do8213BzSZLr0z8_d3sVyuBhYURZH1rEDAA,37688
|
15
15
|
intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
|
16
16
|
intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
|
17
17
|
intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
|
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
|
|
187
187
|
intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
|
188
188
|
intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
|
189
189
|
intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
|
190
|
-
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=
|
190
|
+
intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=ZxYdf6i90mDDNCd4jgtDDfFXFqGwsopGA6u5I9nWdRo,301056
|
191
191
|
intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
|
192
192
|
intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
|
193
193
|
intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
|
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
|
|
217
217
|
intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
|
218
218
|
intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
|
219
219
|
intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
|
220
|
-
bigdl_core_npu-2.
|
221
|
-
bigdl_core_npu-2.
|
222
|
-
bigdl_core_npu-2.
|
223
|
-
bigdl_core_npu-2.
|
220
|
+
bigdl_core_npu-2.6.0b2.dist-info/METADATA,sha256=9h9wb4lWrHS8XM0QCcw9PPeZevup5swuMihy_tBBqZE,1536
|
221
|
+
bigdl_core_npu-2.6.0b2.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
|
222
|
+
bigdl_core_npu-2.6.0b2.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
|
223
|
+
bigdl_core_npu-2.6.0b2.dist-info/RECORD,,
|
@@ -153,7 +153,14 @@ class BaseNPUBackendWithPrefetch(BaseNPUBackend):
|
|
153
153
|
raise ValueError(f"Invalid dtype for scale: {scale.dtype}")
|
154
154
|
else:
|
155
155
|
adapted_weights, shape = adapt_weight(weight)
|
156
|
-
|
156
|
+
if weight.dtype == np.uint8:
|
157
|
+
backend_lib.addInt4WeightParameter(
|
158
|
+
param,
|
159
|
+
adapted_weights,
|
160
|
+
*shape,
|
161
|
+
)
|
162
|
+
else:
|
163
|
+
backend_lib.addFloatParameter(param, adapted_weights, *shape)
|
157
164
|
elif isinstance(weights, np.ndarray):
|
158
165
|
adapted_weights, shape = adapt_weight(weights)
|
159
166
|
backend_lib.addFloatParameter(param, adapted_weights, *shape)
|
@@ -160,6 +160,7 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
160
160
|
ctypes.c_bool,
|
161
161
|
ctypes.c_char_p,
|
162
162
|
ctypes.c_char_p,
|
163
|
+
ctypes.c_bool,
|
163
164
|
]
|
164
165
|
lib.linear.restype = handler
|
165
166
|
|
@@ -214,6 +215,28 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
214
215
|
]
|
215
216
|
lib.max_pooling.restype = handler
|
216
217
|
|
218
|
+
|
219
|
+
lib.multi_concat.argtypes = [
|
220
|
+
handler,
|
221
|
+
ctypes.POINTER(handler),
|
222
|
+
ctypes.c_uint64,
|
223
|
+
ctypes.c_int64,
|
224
|
+
]
|
225
|
+
lib.multi_concat.restype = handler
|
226
|
+
|
227
|
+
lib.dq_split_linear.argtypes = [
|
228
|
+
handler,
|
229
|
+
handler,
|
230
|
+
ctypes.c_int,
|
231
|
+
ctypes.c_int,
|
232
|
+
ctypes.c_int,
|
233
|
+
ctypes.c_bool,
|
234
|
+
ctypes.c_char_p,
|
235
|
+
ctypes.c_char_p,
|
236
|
+
ctypes.c_bool,
|
237
|
+
]
|
238
|
+
lib.dq_split_linear.restype = handler
|
239
|
+
|
217
240
|
for op in get_supported_ops():
|
218
241
|
fn = getattr(lib, op.name)
|
219
242
|
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
|
@@ -260,6 +283,13 @@ def init_parameters(lib: ctypes.CDLL):
|
|
260
283
|
ctypes.c_int,
|
261
284
|
]
|
262
285
|
|
286
|
+
lib.addInt4WeightParameter.argtypes = [
|
287
|
+
handler,
|
288
|
+
c_u8_array,
|
289
|
+
ctypes.c_int,
|
290
|
+
ctypes.c_int,
|
291
|
+
]
|
292
|
+
|
263
293
|
|
264
294
|
def initialize_bindings() -> ctypes.CDLL:
|
265
295
|
"""Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.
|
@@ -95,6 +95,75 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
95
95
|
|
96
96
|
return cast(F, wrapper)
|
97
97
|
|
98
|
+
def return_tensor_for_list_inputs(fn: F) -> F: # type: ignore
|
99
|
+
"""Wrap the output of a function in a Tensor object.
|
100
|
+
This new wrapper add support for List Tensor input.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
fn (function): Function
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
function: A function that wraps the output in a Tensor object
|
107
|
+
"""
|
108
|
+
|
109
|
+
def wrapper(self, *args: Any, **kwargs: Any) -> Tensor:
|
110
|
+
"""Wrap the output of a function in a Tensor object.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
args (Any): Variable length argument list
|
114
|
+
kwargs (Any): Arbitrary keyword arguments
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Tensor: Tensor object
|
118
|
+
"""
|
119
|
+
# Convert Tensor objects to their underlying node
|
120
|
+
# args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
121
|
+
new_args = []
|
122
|
+
for arg in args:
|
123
|
+
if isinstance(arg, Tensor):
|
124
|
+
new_args.append(arg.node)
|
125
|
+
elif isinstance(arg, (tuple, list)):
|
126
|
+
# for item in arg:
|
127
|
+
for i in range(len(arg)):
|
128
|
+
if isinstance(arg[i], Tensor):
|
129
|
+
arg[i] = arg[i].node
|
130
|
+
new_args.append(arg)
|
131
|
+
else:
|
132
|
+
new_args.append(arg)
|
133
|
+
args = tuple(new_args)
|
134
|
+
kwargs = {
|
135
|
+
k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
|
136
|
+
}
|
137
|
+
|
138
|
+
# input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
|
139
|
+
# v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
|
140
|
+
# ]
|
141
|
+
input_nodes = []
|
142
|
+
for arg in args:
|
143
|
+
if isinstance(arg, ctypes._Pointer):
|
144
|
+
input_nodes.append(arg)
|
145
|
+
elif isinstance(arg, (tuple, list)):
|
146
|
+
for item in arg:
|
147
|
+
if isinstance(item, ctypes._Pointer):
|
148
|
+
input_nodes.append(item)
|
149
|
+
input_nodes += [v for v in kwargs.values() if isinstance(v, ctypes._Pointer)]
|
150
|
+
|
151
|
+
# Call the function
|
152
|
+
node = fn(self, *args, **kwargs)
|
153
|
+
|
154
|
+
# remove input nodes from output_nodes
|
155
|
+
self.output_nodes = [
|
156
|
+
node for node in self.output_nodes if node not in input_nodes
|
157
|
+
]
|
158
|
+
# add output node to output_nodes
|
159
|
+
if fn.__name__ != "constant":
|
160
|
+
self.output_nodes.append(node)
|
161
|
+
|
162
|
+
# Wrap the node in a Tensor object
|
163
|
+
return Tensor(factory=self, node=node)
|
164
|
+
|
165
|
+
return cast(F, wrapper)
|
166
|
+
|
98
167
|
@return_tensor
|
99
168
|
def _call_backend_op(self, op_name: str, *parameters: Any) -> Any:
|
100
169
|
"""Dynamically call a backend operation.
|
@@ -319,6 +388,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
319
388
|
bias: Optional[bool] = False,
|
320
389
|
act_dtype: npt.DTypeLike = np.float16,
|
321
390
|
wt_dtype: npt.DTypeLike = np.float16,
|
391
|
+
scale_factor: bool = True,
|
322
392
|
) -> ctypes._Pointer:
|
323
393
|
"""Generate a linear layer.
|
324
394
|
|
@@ -341,7 +411,36 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
341
411
|
bias,
|
342
412
|
self.get_backend_dtype(act_dtype),
|
343
413
|
self.get_backend_dtype(wt_dtype),
|
414
|
+
scale_factor
|
344
415
|
)
|
416
|
+
|
417
|
+
@return_tensor
|
418
|
+
def dq_split_linear(
|
419
|
+
self, input_node: ctypes._Pointer, n_splits: int,
|
420
|
+
outout_channels: int, input_channels: int, bias: bool = False,
|
421
|
+
act_dtype: npt.DTypeLike = np.float16,
|
422
|
+
wt_dtype: npt.DTypeLike = np.float16,
|
423
|
+
scale_factor: bool = True,
|
424
|
+
) -> ctypes._Pointer:
|
425
|
+
"""Generate a linear layer for dynamic quantization linear layer.
|
426
|
+
|
427
|
+
Args:
|
428
|
+
input_node (ctypes._Pointer): layer input node
|
429
|
+
n_splits (int): number of parts the linear layer is split into
|
430
|
+
output_channels (int): number of output channels
|
431
|
+
input_channels (int): number of input channels
|
432
|
+
bias (bool, optional): enable/disable bias. Defaults to False.
|
433
|
+
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
|
434
|
+
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
|
435
|
+
|
436
|
+
Returns:
|
437
|
+
ctypes._Pointer: output node
|
438
|
+
"""
|
439
|
+
return backend_lib.dq_split_linear(self._mm, input_node, n_splits,
|
440
|
+
input_channels, outout_channels, bias,
|
441
|
+
self.get_backend_dtype(act_dtype),
|
442
|
+
self.get_backend_dtype(wt_dtype),
|
443
|
+
scale_factor)
|
345
444
|
|
346
445
|
@return_tensor
|
347
446
|
def reshape(
|
@@ -474,6 +573,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
474
573
|
axis = np.int64(axis)
|
475
574
|
return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
|
476
575
|
|
576
|
+
@return_tensor_for_list_inputs
|
577
|
+
def sequence_concat(
|
578
|
+
self, input_nodes: List[ctypes._Pointer], axis: int
|
579
|
+
) -> ctypes._Pointer:
|
580
|
+
"""Generate a concatenation layer.
|
581
|
+
|
582
|
+
Args:
|
583
|
+
input_nodes (List[ctypes._Pointer]): sequence of layer input node
|
584
|
+
axis (int): axis
|
585
|
+
|
586
|
+
Returns:
|
587
|
+
ctypes._Pointer: output node
|
588
|
+
"""
|
589
|
+
if axis < 0:
|
590
|
+
shape_size = backend_lib.op_shape_size(input_nodes[0])
|
591
|
+
axis = (axis + shape_size) % shape_size
|
592
|
+
axis = np.int64(axis)
|
593
|
+
|
594
|
+
input_ptr = (ctypes.POINTER(ctypes.c_char) * len(input_nodes))(*input_nodes)
|
595
|
+
return backend_lib.multi_concat(self._mm, input_ptr, len(input_nodes), axis)
|
596
|
+
|
477
597
|
@return_tensor
|
478
598
|
def reduce_max(
|
479
599
|
self,
|
Binary file
|
File without changes
|