bigdl-core-npu 2.6.0b20241112__cp311-cp311-win_amd64.whl → 2.6.0b20241118__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/METADATA +1 -1
- {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/RECORD +96 -86
- {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/WHEEL +1 -1
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/bindings.py +38 -3
- intel_npu_acceleration_library/backend/factory.py +77 -48
- intel_npu_acceleration_library/backend/ops.py +2 -1
- intel_npu_acceleration_library/backend/tensor.py +73 -3
- intel_npu_acceleration_library/device.py +2 -2
- intel_npu_acceleration_library/dtypes.py +34 -1
- intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +283 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/py_jax_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +129 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +8 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +1 -1
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +3 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
- intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
- intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +62 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +60 -43
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +67 -1
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
- intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +70 -60
- intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
- intel_npu_acceleration_library/external/openvino/utils.py +17 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_jax_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/nn/module.py +17 -17
- {bigdl_core_npu-2.6.0b20241112.dist-info → bigdl_core_npu-2.6.0b20241118.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefet
|
|
7
7
|
from intel_npu_acceleration_library.backend.ops import get_supported_ops
|
8
8
|
from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
|
9
9
|
from intel_npu_acceleration_library.backend.tensor import Tensor
|
10
|
-
from intel_npu_acceleration_library.dtypes import int4, bfloat16
|
10
|
+
from intel_npu_acceleration_library.dtypes import int4, bfloat16, get_backend_dtype
|
11
11
|
from typing import Optional, Tuple, Any, Union, Sequence, TypeVar, Callable, cast, List
|
12
12
|
from functools import partial
|
13
13
|
import numpy.typing as npt
|
@@ -71,17 +71,24 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
71
71
|
Tensor: Tensor object
|
72
72
|
"""
|
73
73
|
# Convert Tensor objects to their underlying node
|
74
|
-
args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
75
74
|
kwargs = {
|
76
75
|
k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
|
77
76
|
}
|
78
77
|
|
78
|
+
if fn.__qualname__ == 'NNFactory.reshape':
|
79
|
+
output_idx = args[0].output_idx
|
80
|
+
kwargs["output_idx"] = output_idx
|
81
|
+
args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
82
|
+
|
83
|
+
|
79
84
|
input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
|
80
85
|
v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
|
81
86
|
]
|
82
87
|
# Call the function
|
83
88
|
node = fn(self, *args, **kwargs)
|
84
89
|
|
90
|
+
output_len = backend_lib.op_output_size(node)
|
91
|
+
|
85
92
|
# remove input nodes from output_nodes
|
86
93
|
self.output_nodes = [
|
87
94
|
node for node in self.output_nodes if node not in input_nodes
|
@@ -91,7 +98,13 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
91
98
|
self.output_nodes.append(node)
|
92
99
|
|
93
100
|
# Wrap the node in a Tensor object
|
94
|
-
|
101
|
+
if output_len == 1:
|
102
|
+
return Tensor(factory=self, node=node, output_idx=0)
|
103
|
+
else:
|
104
|
+
output_tensor_list = []
|
105
|
+
for i in range(output_len):
|
106
|
+
output_tensor_list.append(Tensor(factory=self, node=node, output_idx=i))
|
107
|
+
return output_tensor_list
|
95
108
|
|
96
109
|
return cast(F, wrapper)
|
97
110
|
|
@@ -184,34 +197,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
184
197
|
Args:
|
185
198
|
dtype: numpy dtype
|
186
199
|
|
187
|
-
Raises:
|
188
|
-
RuntimeError: Unsupported datatype
|
189
|
-
|
190
200
|
Returns:
|
191
201
|
ctypes.c_char_p: string representation of the dtype
|
192
202
|
"""
|
193
|
-
|
194
|
-
str_dtype = "int8"
|
195
|
-
elif dtype == np.uint8 or dtype == int4:
|
196
|
-
# u8 represents packed i4 dtypes
|
197
|
-
str_dtype = "int4"
|
198
|
-
elif dtype in [np.int16, torch.int16]:
|
199
|
-
str_dtype = "int16"
|
200
|
-
elif dtype in [np.int32, torch.int32]:
|
201
|
-
str_dtype = "int32"
|
202
|
-
elif dtype in [np.int64, torch.int64]:
|
203
|
-
str_dtype = "int64"
|
204
|
-
elif dtype in [np.float16, torch.float16]:
|
205
|
-
str_dtype = "float16"
|
206
|
-
elif dtype in [np.float32, torch.float32]:
|
207
|
-
str_dtype = "float32"
|
208
|
-
elif dtype in [np.float64, torch.float64]:
|
209
|
-
str_dtype = "float64"
|
210
|
-
elif dtype in [bfloat16, torch.bfloat16]:
|
211
|
-
str_dtype = "bfloat16"
|
212
|
-
else:
|
213
|
-
raise RuntimeError(f"DType is not supported {dtype}")
|
214
|
-
return ctypes.c_char_p(str_dtype.encode())
|
203
|
+
return get_backend_dtype(dtype)
|
215
204
|
|
216
205
|
@return_tensor
|
217
206
|
def parameter(
|
@@ -422,6 +411,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
422
411
|
wt_dtype: npt.DTypeLike = np.float16,
|
423
412
|
scale_factor: bool = True,
|
424
413
|
is_prefill: bool = False,
|
414
|
+
use_dq: bool = True,
|
425
415
|
) -> ctypes._Pointer:
|
426
416
|
"""Generate a linear layer for dynamic quantization linear layer.
|
427
417
|
|
@@ -439,7 +429,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
439
429
|
Returns:
|
440
430
|
ctypes._Pointer: output node
|
441
431
|
"""
|
442
|
-
|
432
|
+
if is_prefill:
|
433
|
+
func = backend_lib.dq_split_linear_prefill if use_dq else backend_lib.gw_linear_prefill
|
434
|
+
else:
|
435
|
+
func = backend_lib.dq_split_linear
|
443
436
|
return func(self._mm, input_node, n_splits,
|
444
437
|
input_channels, outout_channels, bias,
|
445
438
|
self.get_backend_dtype(act_dtype),
|
@@ -448,7 +441,9 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
448
441
|
|
449
442
|
@return_tensor
|
450
443
|
def reshape(
|
451
|
-
self, input_node: ctypes._Pointer, shape: Sequence[int]
|
444
|
+
self, input_node: ctypes._Pointer, shape: Sequence[int],
|
445
|
+
special_zero: bool = True,
|
446
|
+
output_idx: int = 0
|
452
447
|
) -> ctypes._Pointer:
|
453
448
|
"""Generate a reshape layer.
|
454
449
|
|
@@ -460,7 +455,8 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
460
455
|
ctypes._Pointer: output node
|
461
456
|
"""
|
462
457
|
shape_node = self.constant(shape).node # type: ignore
|
463
|
-
return backend_lib.reshape(self._mm, input_node, shape_node
|
458
|
+
return backend_lib.reshape(self._mm, input_node, shape_node,
|
459
|
+
special_zero, output_idx)
|
464
460
|
|
465
461
|
@return_tensor
|
466
462
|
def broadcast(
|
@@ -612,7 +608,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
612
608
|
ctypes._Pointer: output node
|
613
609
|
"""
|
614
610
|
if axis < 0:
|
615
|
-
shape_size = backend_lib.op_shape_size(input_node_1)
|
611
|
+
shape_size = backend_lib.op_shape_size(input_node_1, 0)
|
616
612
|
axis = (axis + shape_size) % shape_size
|
617
613
|
axis = np.int64(axis)
|
618
614
|
return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
|
@@ -631,7 +627,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
631
627
|
ctypes._Pointer: output node
|
632
628
|
"""
|
633
629
|
if axis < 0:
|
634
|
-
shape_size = backend_lib.op_shape_size(input_nodes[0])
|
630
|
+
shape_size = backend_lib.op_shape_size(input_nodes[0], 0)
|
635
631
|
axis = (axis + shape_size) % shape_size
|
636
632
|
axis = np.int64(axis)
|
637
633
|
|
@@ -656,7 +652,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
656
652
|
ctypes._Pointer: output node
|
657
653
|
"""
|
658
654
|
if reduction_axes is None:
|
659
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
655
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
660
656
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
661
657
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
662
658
|
return backend_lib.reduce_max(self._mm, input_node, axis_node, keep_dims)
|
@@ -679,7 +675,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
679
675
|
ctypes._Pointer: output node
|
680
676
|
"""
|
681
677
|
if reduction_axes is None:
|
682
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
678
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
683
679
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
684
680
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
685
681
|
return backend_lib.reduce_mean(self._mm, input_node, axis_node, keep_dims)
|
@@ -702,7 +698,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
702
698
|
ctypes._Pointer: output node
|
703
699
|
"""
|
704
700
|
if reduction_axes is None:
|
705
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
701
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
706
702
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
707
703
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
708
704
|
return backend_lib.reduce_min(self._mm, input_node, axis_node, keep_dims)
|
@@ -725,7 +721,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
725
721
|
ctypes._Pointer: output node
|
726
722
|
"""
|
727
723
|
if reduction_axes is None:
|
728
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
724
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
729
725
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
730
726
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
731
727
|
return backend_lib.reduce_prod(self._mm, input_node, axis_node, keep_dims)
|
@@ -748,7 +744,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
748
744
|
ctypes._Pointer: output node
|
749
745
|
"""
|
750
746
|
if reduction_axes is None:
|
751
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
747
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
752
748
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
753
749
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
754
750
|
return backend_lib.reduce_sum(self._mm, input_node, axis_node, keep_dims)
|
@@ -768,7 +764,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
768
764
|
ctypes._Pointer: output node
|
769
765
|
"""
|
770
766
|
if axis < 0:
|
771
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
767
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
772
768
|
axis = (axis + shape_size) % shape_size
|
773
769
|
axis_node = self.constant(axis).node # type: ignore
|
774
770
|
return backend_lib.normL2(self._mm, input_node, axis_node, eps)
|
@@ -791,14 +787,14 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
791
787
|
Returns:
|
792
788
|
ctypes._Pointer: output node
|
793
789
|
"""
|
794
|
-
input_shape_size = backend_lib.op_shape_size(input_node)
|
790
|
+
input_shape_size = backend_lib.op_shape_size(input_node, 0)
|
795
791
|
input_shape = [
|
796
|
-
backend_lib.op_shape(input_node, i) for i in range(input_shape_size)
|
792
|
+
backend_lib.op_shape(input_node, i, 0) for i in range(input_shape_size)
|
797
793
|
]
|
798
794
|
if isinstance(exponent, ctypes._Pointer):
|
799
|
-
exponent_shape_size = backend_lib.op_shape_size(input_node)
|
795
|
+
exponent_shape_size = backend_lib.op_shape_size(input_node, 0)
|
800
796
|
exponent_shape = [
|
801
|
-
backend_lib.op_shape(exponent, i) for i in range(exponent_shape_size)
|
797
|
+
backend_lib.op_shape(exponent, i, 0) for i in range(exponent_shape_size)
|
802
798
|
]
|
803
799
|
else:
|
804
800
|
exponent_shape = list(exponent.shape)
|
@@ -807,6 +803,39 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
807
803
|
# raise ValueError("Input tensor shapes are not equal")
|
808
804
|
|
809
805
|
return backend_lib.power(self._mm, input_node, exponent)
|
806
|
+
|
807
|
+
@return_tensor
|
808
|
+
def variadic_split(
|
809
|
+
self,
|
810
|
+
input: ctypes._Pointer,
|
811
|
+
axis: int,
|
812
|
+
split_lengths: Sequence[int],
|
813
|
+
) -> ctypes._Pointer:
|
814
|
+
"""Generate an average pooling layer.
|
815
|
+
|
816
|
+
Args:
|
817
|
+
input (ctypes._Pointer): layer input node
|
818
|
+
axis (int): split axis
|
819
|
+
split_lengths (Sequence[int]): A list containing the sizes of each output tensor
|
820
|
+
along the split "axis". Size of "split_lengths" should be equal to the number of
|
821
|
+
outputs. The sum of split_lengths must match data.shape[axis]
|
822
|
+
|
823
|
+
Raises:
|
824
|
+
NotImplementedError: divisor_override is not supported
|
825
|
+
|
826
|
+
Returns:
|
827
|
+
ctypes._Pointer: output node
|
828
|
+
"""
|
829
|
+
|
830
|
+
split_lens_ptr = np.array(split_lengths, dtype=np.uint32)
|
831
|
+
|
832
|
+
return backend_lib.variadic_split(
|
833
|
+
self._mm,
|
834
|
+
input,
|
835
|
+
axis,
|
836
|
+
split_lens_ptr,
|
837
|
+
split_lens_ptr.size,
|
838
|
+
)
|
810
839
|
|
811
840
|
@return_tensor
|
812
841
|
def avg_pooling(
|
@@ -962,7 +991,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
962
991
|
value, attn_mask,
|
963
992
|
is_causal)
|
964
993
|
|
965
|
-
def get_tensor_shape(self, node):
|
994
|
+
def get_tensor_shape(self, node, output_idx=0):
|
966
995
|
"""Get tensor shape.
|
967
996
|
|
968
997
|
Args:
|
@@ -971,10 +1000,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
971
1000
|
Returns:
|
972
1001
|
tuple[int]: tensor shape
|
973
1002
|
"""
|
974
|
-
size = backend_lib.op_shape_size(node)
|
975
|
-
return tuple([backend_lib.op_shape(node, idx) for idx in range(size)])
|
1003
|
+
size = backend_lib.op_shape_size(node, output_idx)
|
1004
|
+
return tuple([backend_lib.op_shape(node, idx, output_idx) for idx in range(size)])
|
976
1005
|
|
977
|
-
def get_tensor_dtype(self, node):
|
1006
|
+
def get_tensor_dtype(self, node, output_idx=0):
|
978
1007
|
"""Get tensor dtype.
|
979
1008
|
|
980
1009
|
Args:
|
@@ -986,7 +1015,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
986
1015
|
Returns:
|
987
1016
|
str: tensor dtype
|
988
1017
|
"""
|
989
|
-
dtype_int = backend_lib.op_dtype(node)
|
1018
|
+
dtype_int = backend_lib.op_dtype(node, output_idx)
|
990
1019
|
|
991
1020
|
if dtype_int == 2:
|
992
1021
|
return np.bool
|
@@ -98,7 +98,7 @@ def get_supported_ops() -> List[SupportedOp]:
|
|
98
98
|
inputs=3,
|
99
99
|
parameters=[ctypes.c_int],
|
100
100
|
),
|
101
|
-
SupportedOp(name="reshape", inputs=2),
|
101
|
+
SupportedOp(name="reshape", inputs=2, parameters=[ctypes.c_bool, ctypes.c_int]),
|
102
102
|
SupportedOp(name="transpose", inputs=2),
|
103
103
|
SupportedOp(name="squeeze", inputs=1),
|
104
104
|
SupportedOp(name="unsqueeze", inputs=2),
|
@@ -137,5 +137,6 @@ def get_supported_ops() -> List[SupportedOp]:
|
|
137
137
|
SupportedOp(name="power", inputs=2),
|
138
138
|
SupportedOp(name="broadcast", inputs=2),
|
139
139
|
SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
|
140
|
+
SupportedOp(name="rotate_half", inputs=1),
|
140
141
|
]
|
141
142
|
return supported_ops
|
@@ -16,14 +16,83 @@ from intel_npu_acceleration_library.dtypes import (
|
|
16
16
|
int32,
|
17
17
|
int64,
|
18
18
|
NPUDtype,
|
19
|
+
get_backend_dtype,
|
19
20
|
)
|
20
21
|
from dataclasses import dataclass
|
21
22
|
import functools
|
23
|
+
from math import prod
|
22
24
|
import numpy as np
|
23
25
|
import ctypes
|
24
26
|
import torch
|
25
27
|
|
26
28
|
|
29
|
+
class RemoteTensor(torch.Tensor):
|
30
|
+
"""
|
31
|
+
Represent a remote tensor object.
|
32
|
+
Attrs:
|
33
|
+
_remote_tensor (ctypes._Pointer): The pointer to the underlying remote tensor.
|
34
|
+
Methods:
|
35
|
+
from_torch(x: torch.Tensor): Create a remote tensor from a torch tensor.
|
36
|
+
"""
|
37
|
+
|
38
|
+
_remote_tensor = None
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def __new__(cls, x: Any, remote_tensor: ctypes._Pointer, *args: Any, **kwargs: Any):
|
42
|
+
"""
|
43
|
+
Create a new remote tensor object.
|
44
|
+
Args:
|
45
|
+
x (Any): tensor input
|
46
|
+
remote_tensor (ctypes._Pointer): remote tensor pointer
|
47
|
+
args (Any): additional arguments
|
48
|
+
kwargs (Any): additional keyword arguments
|
49
|
+
Returns:
|
50
|
+
RemoteTensor: a RemoteTensor object
|
51
|
+
"""
|
52
|
+
return super().__new__(cls, x, *args, **kwargs)
|
53
|
+
|
54
|
+
def __init__(self, x: Any, remote_tensor: ctypes._Pointer):
|
55
|
+
"""
|
56
|
+
Initialize the remote tensor object.
|
57
|
+
Args:
|
58
|
+
x (Any): tensor input
|
59
|
+
remote_tensor (ctypes._Pointer): remote tensor pointer
|
60
|
+
"""
|
61
|
+
self._remote_tensor = remote_tensor
|
62
|
+
|
63
|
+
# def __del__(self):
|
64
|
+
# if self._remote_tensor and backend_lib:
|
65
|
+
# backend_lib.del_remote_tensor(self._remote_tensor)
|
66
|
+
|
67
|
+
@staticmethod
|
68
|
+
def from_torch(x: torch.Tensor) -> "RemoteTensor":
|
69
|
+
"""
|
70
|
+
Create a remote tensor from a torch tensor.
|
71
|
+
Args:
|
72
|
+
x (torch.Tensor): The torch tensor.
|
73
|
+
Returns:
|
74
|
+
RemoteTensor: The remote tensor.
|
75
|
+
"""
|
76
|
+
shape_arr = np.array(x.shape, dtype=np.uint32)
|
77
|
+
dtype_str = get_backend_dtype(x.dtype)
|
78
|
+
p = ctypes.cast(x.data_ptr(), ctypes.c_void_p)
|
79
|
+
|
80
|
+
rt = backend_lib.to_npu(shape_arr.size, shape_arr, dtype_str, p)
|
81
|
+
|
82
|
+
pointer = ctypes.cast(
|
83
|
+
backend_lib.remote_tensor_data(rt),
|
84
|
+
ctypes.POINTER(ctypes.c_uint8),
|
85
|
+
)
|
86
|
+
|
87
|
+
arr = (pointer._type_ * prod(x.shape) * x.element_size()).from_address(
|
88
|
+
ctypes.addressof(pointer.contents)
|
89
|
+
)
|
90
|
+
|
91
|
+
pt_tensor = torch.frombuffer(arr, dtype=x.dtype).view(*x.shape)
|
92
|
+
|
93
|
+
return RemoteTensor(pt_tensor, rt)
|
94
|
+
|
95
|
+
|
27
96
|
@dataclass
|
28
97
|
class Tensor:
|
29
98
|
"""
|
@@ -88,6 +157,7 @@ class Tensor:
|
|
88
157
|
|
89
158
|
factory: "NNFactory" # type: ignore # noqa: F821
|
90
159
|
node: ctypes._Pointer
|
160
|
+
output_idx: int
|
91
161
|
|
92
162
|
@property
|
93
163
|
def shape(self) -> Sequence[int]:
|
@@ -97,8 +167,8 @@ class Tensor:
|
|
97
167
|
Returns:
|
98
168
|
Sequence[int]: The shape of the tensor.
|
99
169
|
"""
|
100
|
-
shape_size = backend_lib.op_shape_size(self.node)
|
101
|
-
return [backend_lib.op_shape(self.node, i) for i in range(shape_size)]
|
170
|
+
shape_size = backend_lib.op_shape_size(self.node, self.output_idx)
|
171
|
+
return [backend_lib.op_shape(self.node, i, self.output_idx) for i in range(shape_size)]
|
102
172
|
|
103
173
|
@property
|
104
174
|
def dtype(self) -> NPUDtype:
|
@@ -108,7 +178,7 @@ class Tensor:
|
|
108
178
|
Returns:
|
109
179
|
type: The data type of the tensor.
|
110
180
|
"""
|
111
|
-
dtype_int = backend_lib.op_dtype(self.node)
|
181
|
+
dtype_int = backend_lib.op_dtype(self.node, self.output_idx)
|
112
182
|
|
113
183
|
if dtype_int == 2:
|
114
184
|
return np.bool
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#
|
5
5
|
|
6
6
|
from intel_npu_acceleration_library.nn.module import convert_to_npu_module
|
7
|
+
from intel_npu_acceleration_library.backend.tensor import RemoteTensor
|
7
8
|
from torch.overrides import TorchFunctionMode
|
8
9
|
from functools import lru_cache
|
9
10
|
from typing import Any, MutableMapping
|
@@ -165,8 +166,7 @@ def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
|
|
165
166
|
"""
|
166
167
|
npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
|
167
168
|
if npu_device:
|
168
|
-
|
169
|
-
pass
|
169
|
+
return super_fn(RemoteTensor.from_torch(self), *args, **kwargs)
|
170
170
|
return super_fn(self, *args, **kwargs)
|
171
171
|
|
172
172
|
|
@@ -7,7 +7,7 @@ from dataclasses import dataclass
|
|
7
7
|
from typing import Union
|
8
8
|
import numpy as np
|
9
9
|
import torch
|
10
|
-
|
10
|
+
import ctypes
|
11
11
|
|
12
12
|
@dataclass(frozen=True)
|
13
13
|
class NPUDtype:
|
@@ -81,6 +81,39 @@ class NPUDtype:
|
|
81
81
|
return self.name
|
82
82
|
|
83
83
|
|
84
|
+
def get_backend_dtype(dtype) -> ctypes.c_char_p:
|
85
|
+
"""Get the string representation of the dtype.
|
86
|
+
Args:
|
87
|
+
dtype: numpy dtype
|
88
|
+
Raises:
|
89
|
+
RuntimeError: Unsupported datatype
|
90
|
+
Returns:
|
91
|
+
ctypes.c_char_p: string representation of the dtype
|
92
|
+
"""
|
93
|
+
if dtype in [np.int8, torch.int8]:
|
94
|
+
str_dtype = "int8"
|
95
|
+
elif dtype in [np.uint8, int4, torch.uint8]:
|
96
|
+
# u8 represents packed i4 dtypes
|
97
|
+
str_dtype = "int4"
|
98
|
+
elif dtype in [np.int16, torch.int16]:
|
99
|
+
str_dtype = "int16"
|
100
|
+
elif dtype in [np.int32, torch.int32]:
|
101
|
+
str_dtype = "int32"
|
102
|
+
elif dtype in [np.int64, torch.int64]:
|
103
|
+
str_dtype = "int64"
|
104
|
+
elif dtype in [np.float16, torch.float16]:
|
105
|
+
str_dtype = "float16"
|
106
|
+
elif dtype in [np.float32, torch.float32]:
|
107
|
+
str_dtype = "float32"
|
108
|
+
elif dtype in [np.float64, torch.float64]:
|
109
|
+
str_dtype = "float64"
|
110
|
+
elif dtype in [bfloat16, torch.bfloat16]:
|
111
|
+
str_dtype = "bfloat16"
|
112
|
+
else:
|
113
|
+
raise RuntimeError(f"DType is not supported {dtype}")
|
114
|
+
return ctypes.c_char_p(str_dtype.encode())
|
115
|
+
|
116
|
+
|
84
117
|
float16 = NPUDtype(
|
85
118
|
"fp16",
|
86
119
|
16,
|
@@ -18,3 +18,4 @@ from openvino._pyopenvino._offline_transformations import compress_model_transfo
|
|
18
18
|
from openvino._pyopenvino._offline_transformations import compress_quantize_weights_transformation
|
19
19
|
from openvino._pyopenvino._offline_transformations import convert_sequence_to_tensor_iterator_transformation
|
20
20
|
from openvino._pyopenvino._offline_transformations import paged_attention_transformation
|
21
|
+
from openvino._pyopenvino._offline_transformations import stateful_to_stateless_transformation
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
"""
|
5
|
+
Package: openvino
|
6
|
+
Low level wrappers for the FrontEnd C++ API.
|
7
|
+
"""
|
8
|
+
|
9
|
+
# flake8: noqa
|
10
|
+
|
11
|
+
try:
|
12
|
+
from openvino.frontend.jax.py_jax_frontend import _FrontEndJaxDecoder as Decoder
|
13
|
+
except ImportError as err:
|
14
|
+
raise ImportError("OpenVINO JAX frontend is not available, please make sure the frontend is built."
|
15
|
+
"{}".format(err))
|