bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0b2__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-npu
3
- Version: 2.5.0
3
+ Version: 2.6.0b2
4
4
  Summary: Intel® NPU Acceleration Library
5
5
  Home-page: https://github.com/intel/intel-npu-acceleration-library
6
6
  Author: Alessandro Palla
@@ -1,5 +1,5 @@
1
1
  intel_npu_acceleration_library/__init__.py,sha256=ZKTIhGMDjF7P6pF-yX8KWcSXbeHWRk24AO_orsa18f8,536
2
- intel_npu_acceleration_library/_version.py,sha256=-yyXJHoPI8Uu4p1coZDeAWH6XHHdLed8GM4ogYbrieE,103
2
+ intel_npu_acceleration_library/_version.py,sha256=o0ui4TyomjRVwLdl9zwG9b1pXUD6KLzj1zUaNOMu9V8,105
3
3
  intel_npu_acceleration_library/compiler.py,sha256=3IdgqjamSC8MLexDBJypIeZRiWIcTFnvQSU1LPXUr7Y,6225
4
4
  intel_npu_acceleration_library/device.py,sha256=TbG4cJ197qo7PJQ5zz9zfxbuXB5OTWJlKNaKL4TAlms,7395
5
5
  intel_npu_acceleration_library/dtypes.py,sha256=1CV4FIuvlmLsTCS1nCCEwq4EzZmD3thj1_92v5vajpw,3539
@@ -7,11 +7,11 @@ intel_npu_acceleration_library/modelling.py,sha256=vSiQOWGJ0l6wGV7zWQtZEkHpnMQIM
7
7
  intel_npu_acceleration_library/optimizations.py,sha256=9NY8QoDFbs2LY12jbx6As8g2v0oInX4YzvkjnqViA70,5469
8
8
  intel_npu_acceleration_library/quantization.py,sha256=6N_04h1KX6TNbw-ceANV0Pmk4_lQ2Y9C7Pwn5x-zQzo,5566
9
9
  intel_npu_acceleration_library/backend/__init__.py,sha256=2NP6Ypr1dGUNXmLGW5GD9xrh0U9KJgqxTd_c7su1RUY,857
10
- intel_npu_acceleration_library/backend/base.py,sha256=7L1SE-8HKSB5efP8ACQ5tKa89NBkQlf2IxXrSUxGvjs,8317
11
- intel_npu_acceleration_library/backend/bindings.py,sha256=zoF6etBvQWwAsQmA-woyivZAmZk1RfJaWNn0QShaPjs,7925
10
+ intel_npu_acceleration_library/backend/base.py,sha256=hbHqxSOfWH5BaA5PY6_zaf1Zdg5NrQK6WOfe-hr279k,8605
11
+ intel_npu_acceleration_library/backend/bindings.py,sha256=g802BSzQUqAS-LzgUP4UGfrxYHNG397we31xanDs3B8,8576
12
12
  intel_npu_acceleration_library/backend/compression.py,sha256=Avz_zm2s_ELy5peVQ8zFGn8njBfh9nEGR16mflotBic,630
13
13
  intel_npu_acceleration_library/backend/convolution.py,sha256=cN3k78X3Y4Cbf7er-MFq0sJ4OwIvquj8PajpdEDmCo4,2018
14
- intel_npu_acceleration_library/backend/factory.py,sha256=9RyDBzJJYKiFOd0IxMZl5dr6K_pDvfehhrGsE7xTTAw,32773
14
+ intel_npu_acceleration_library/backend/factory.py,sha256=WqnpZDT3do8213BzSZLr0z8_d3sVyuBhYURZH1rEDAA,37688
15
15
  intel_npu_acceleration_library/backend/linear.py,sha256=RiLUh5FOSxRWHB5kYx7mOPOOrS_vxIeBJ5t3yC6wOiQ,1908
16
16
  intel_npu_acceleration_library/backend/matmul.py,sha256=mfGi73-mIbUcXp4kyvCGW0Y9kb4Xp1ppbGNpdJFohuA,1819
17
17
  intel_npu_acceleration_library/backend/mlp.py,sha256=BuKVwSI726v3nHQQvtMBbXyWxRTq-WoLZtTxeSeWaaY,2330
@@ -187,7 +187,7 @@ intel_npu_acceleration_library/external/openvino/torch/__init__.py,sha256=RXLzsf
187
187
  intel_npu_acceleration_library/functional/__init__.py,sha256=WWKwKOh6Sgovv7mKctA872TbLP98Pg5m5-MREvUmlAA,204
188
188
  intel_npu_acceleration_library/functional/scaled_dot_product_attention.py,sha256=yGUcg4tDQOLuUnP1g74cl-ec8TRr2SuAMcNLlN6qLvE,1620
189
189
  intel_npu_acceleration_library/lib/Release/cache.json,sha256=CyrSqZUWo0Ec4_7ydOiuKIC0Gm8AybrGdozUqUuHxBw,8840377
190
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=zkSTv2wpkfCfOIkCPXZ3O7RfTTo8xIF7d7Yu_W3wJxk,281600
190
+ intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll,sha256=ZxYdf6i90mDDNCd4jgtDDfFXFqGwsopGA6u5I9nWdRo,301056
191
191
  intel_npu_acceleration_library/lib/Release/openvino.dll,sha256=_ifEwHwM-7LuKMhAnlqNuJ2GxsLXbG47easxl5E4shU,12624904
192
192
  intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll,sha256=hXFvu4oLvfNhCODn5eNYOmkxBb0LEKYXHA0sZLccOXc,195080
193
193
  intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll,sha256=nh_iDxejjHlkes-KT0IwBzEd4Ec0L3bXQFCl0Dqerf8,472072
@@ -217,7 +217,7 @@ intel_npu_acceleration_library/nn/functional.py,sha256=UfAKBc0u6RtyaMo14ldH2GpEn
217
217
  intel_npu_acceleration_library/nn/linear.py,sha256=Q06SoGQeLaI86nA_ky2GnFC6H2Fw1zyMDILKnpYC2eo,5739
218
218
  intel_npu_acceleration_library/nn/llm.py,sha256=P6dz36Yf6BHtzWcftaghC6QaMI_WeRfQwrCbO7fD6hk,15002
219
219
  intel_npu_acceleration_library/nn/module.py,sha256=klVK4A0O-7fLzEIhGhE6_eVgvyVK_NakAqpDq08Ju1Y,12637
220
- bigdl_core_npu-2.5.0.dist-info/METADATA,sha256=NhXfzEaj8jWORFpNU4y5qcnnZVv8sjOMWfnhKcER2cE,1534
221
- bigdl_core_npu-2.5.0.dist-info/WHEEL,sha256=SAw8ns6kJWJnXnbRE00TtcpGWx44Z3WvjJsDRxwIxr8,101
222
- bigdl_core_npu-2.5.0.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
223
- bigdl_core_npu-2.5.0.dist-info/RECORD,,
220
+ bigdl_core_npu-2.6.0b2.dist-info/METADATA,sha256=9h9wb4lWrHS8XM0QCcw9PPeZevup5swuMihy_tBBqZE,1536
221
+ bigdl_core_npu-2.6.0b2.dist-info/WHEEL,sha256=WutsMqxRjo8PALJe8NWxuOYrO2lUIIHDIxhZ8tjc8BY,101
222
+ bigdl_core_npu-2.6.0b2.dist-info/top_level.txt,sha256=CH3qQoleRBC1eThu8mCEMxYNKdzJuXCtmeCXRKskt7A,31
223
+ bigdl_core_npu-2.6.0b2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-cp311-win_amd64
5
5
 
@@ -3,4 +3,4 @@
3
3
  # SPDX-License-Identifier: Apache 2.0
4
4
  #
5
5
 
6
- __version__ = "2.5.0"
6
+ __version__ = "2.6.0b2"
@@ -153,7 +153,14 @@ class BaseNPUBackendWithPrefetch(BaseNPUBackend):
153
153
  raise ValueError(f"Invalid dtype for scale: {scale.dtype}")
154
154
  else:
155
155
  adapted_weights, shape = adapt_weight(weight)
156
- backend_lib.addFloatParameter(param, adapted_weights, *shape)
156
+ if weight.dtype == np.uint8:
157
+ backend_lib.addInt4WeightParameter(
158
+ param,
159
+ adapted_weights,
160
+ *shape,
161
+ )
162
+ else:
163
+ backend_lib.addFloatParameter(param, adapted_weights, *shape)
157
164
  elif isinstance(weights, np.ndarray):
158
165
  adapted_weights, shape = adapt_weight(weights)
159
166
  backend_lib.addFloatParameter(param, adapted_weights, *shape)
@@ -160,6 +160,7 @@ def init_network_factory(lib: ctypes.CDLL):
160
160
  ctypes.c_bool,
161
161
  ctypes.c_char_p,
162
162
  ctypes.c_char_p,
163
+ ctypes.c_bool,
163
164
  ]
164
165
  lib.linear.restype = handler
165
166
 
@@ -214,6 +215,28 @@ def init_network_factory(lib: ctypes.CDLL):
214
215
  ]
215
216
  lib.max_pooling.restype = handler
216
217
 
218
+
219
+ lib.multi_concat.argtypes = [
220
+ handler,
221
+ ctypes.POINTER(handler),
222
+ ctypes.c_uint64,
223
+ ctypes.c_int64,
224
+ ]
225
+ lib.multi_concat.restype = handler
226
+
227
+ lib.dq_split_linear.argtypes = [
228
+ handler,
229
+ handler,
230
+ ctypes.c_int,
231
+ ctypes.c_int,
232
+ ctypes.c_int,
233
+ ctypes.c_bool,
234
+ ctypes.c_char_p,
235
+ ctypes.c_char_p,
236
+ ctypes.c_bool,
237
+ ]
238
+ lib.dq_split_linear.restype = handler
239
+
217
240
  for op in get_supported_ops():
218
241
  fn = getattr(lib, op.name)
219
242
  fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
@@ -260,6 +283,13 @@ def init_parameters(lib: ctypes.CDLL):
260
283
  ctypes.c_int,
261
284
  ]
262
285
 
286
+ lib.addInt4WeightParameter.argtypes = [
287
+ handler,
288
+ c_u8_array,
289
+ ctypes.c_int,
290
+ ctypes.c_int,
291
+ ]
292
+
263
293
 
264
294
  def initialize_bindings() -> ctypes.CDLL:
265
295
  """Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.
@@ -95,6 +95,75 @@ class NNFactory(BaseNPUBackendWithPrefetch):
95
95
 
96
96
  return cast(F, wrapper)
97
97
 
98
+ def return_tensor_for_list_inputs(fn: F) -> F: # type: ignore
99
+ """Wrap the output of a function in a Tensor object.
100
+ This new wrapper add support for List Tensor input.
101
+
102
+ Args:
103
+ fn (function): Function
104
+
105
+ Returns:
106
+ function: A function that wraps the output in a Tensor object
107
+ """
108
+
109
+ def wrapper(self, *args: Any, **kwargs: Any) -> Tensor:
110
+ """Wrap the output of a function in a Tensor object.
111
+
112
+ Args:
113
+ args (Any): Variable length argument list
114
+ kwargs (Any): Arbitrary keyword arguments
115
+
116
+ Returns:
117
+ Tensor: Tensor object
118
+ """
119
+ # Convert Tensor objects to their underlying node
120
+ # args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
121
+ new_args = []
122
+ for arg in args:
123
+ if isinstance(arg, Tensor):
124
+ new_args.append(arg.node)
125
+ elif isinstance(arg, (tuple, list)):
126
+ # for item in arg:
127
+ for i in range(len(arg)):
128
+ if isinstance(arg[i], Tensor):
129
+ arg[i] = arg[i].node
130
+ new_args.append(arg)
131
+ else:
132
+ new_args.append(arg)
133
+ args = tuple(new_args)
134
+ kwargs = {
135
+ k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
136
+ }
137
+
138
+ # input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
139
+ # v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
140
+ # ]
141
+ input_nodes = []
142
+ for arg in args:
143
+ if isinstance(arg, ctypes._Pointer):
144
+ input_nodes.append(arg)
145
+ elif isinstance(arg, (tuple, list)):
146
+ for item in arg:
147
+ if isinstance(item, ctypes._Pointer):
148
+ input_nodes.append(item)
149
+ input_nodes += [v for v in kwargs.values() if isinstance(v, ctypes._Pointer)]
150
+
151
+ # Call the function
152
+ node = fn(self, *args, **kwargs)
153
+
154
+ # remove input nodes from output_nodes
155
+ self.output_nodes = [
156
+ node for node in self.output_nodes if node not in input_nodes
157
+ ]
158
+ # add output node to output_nodes
159
+ if fn.__name__ != "constant":
160
+ self.output_nodes.append(node)
161
+
162
+ # Wrap the node in a Tensor object
163
+ return Tensor(factory=self, node=node)
164
+
165
+ return cast(F, wrapper)
166
+
98
167
  @return_tensor
99
168
  def _call_backend_op(self, op_name: str, *parameters: Any) -> Any:
100
169
  """Dynamically call a backend operation.
@@ -319,6 +388,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
319
388
  bias: Optional[bool] = False,
320
389
  act_dtype: npt.DTypeLike = np.float16,
321
390
  wt_dtype: npt.DTypeLike = np.float16,
391
+ scale_factor: bool = True,
322
392
  ) -> ctypes._Pointer:
323
393
  """Generate a linear layer.
324
394
 
@@ -341,7 +411,36 @@ class NNFactory(BaseNPUBackendWithPrefetch):
341
411
  bias,
342
412
  self.get_backend_dtype(act_dtype),
343
413
  self.get_backend_dtype(wt_dtype),
414
+ scale_factor
344
415
  )
416
+
417
+ @return_tensor
418
+ def dq_split_linear(
419
+ self, input_node: ctypes._Pointer, n_splits: int,
420
+ outout_channels: int, input_channels: int, bias: bool = False,
421
+ act_dtype: npt.DTypeLike = np.float16,
422
+ wt_dtype: npt.DTypeLike = np.float16,
423
+ scale_factor: bool = True,
424
+ ) -> ctypes._Pointer:
425
+ """Generate a linear layer for dynamic quantization linear layer.
426
+
427
+ Args:
428
+ input_node (ctypes._Pointer): layer input node
429
+ n_splits (int): number of parts the linear layer is split into
430
+ output_channels (int): number of output channels
431
+ input_channels (int): number of input channels
432
+ bias (bool, optional): enable/disable bias. Defaults to False.
433
+ act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
434
+ wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
435
+
436
+ Returns:
437
+ ctypes._Pointer: output node
438
+ """
439
+ return backend_lib.dq_split_linear(self._mm, input_node, n_splits,
440
+ input_channels, outout_channels, bias,
441
+ self.get_backend_dtype(act_dtype),
442
+ self.get_backend_dtype(wt_dtype),
443
+ scale_factor)
345
444
 
346
445
  @return_tensor
347
446
  def reshape(
@@ -474,6 +573,27 @@ class NNFactory(BaseNPUBackendWithPrefetch):
474
573
  axis = np.int64(axis)
475
574
  return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
476
575
 
576
+ @return_tensor_for_list_inputs
577
+ def sequence_concat(
578
+ self, input_nodes: List[ctypes._Pointer], axis: int
579
+ ) -> ctypes._Pointer:
580
+ """Generate a concatenation layer.
581
+
582
+ Args:
583
+ input_nodes (List[ctypes._Pointer]): sequence of layer input node
584
+ axis (int): axis
585
+
586
+ Returns:
587
+ ctypes._Pointer: output node
588
+ """
589
+ if axis < 0:
590
+ shape_size = backend_lib.op_shape_size(input_nodes[0])
591
+ axis = (axis + shape_size) % shape_size
592
+ axis = np.int64(axis)
593
+
594
+ input_ptr = (ctypes.POINTER(ctypes.c_char) * len(input_nodes))(*input_nodes)
595
+ return backend_lib.multi_concat(self._mm, input_ptr, len(input_nodes), axis)
596
+
477
597
  @return_tensor
478
598
  def reduce_max(
479
599
  self,