bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl-core-npu/__init__.py +0 -0
- bigdl-core-npu/common.lib +0 -0
- bigdl-core-npu/ggml.dll +0 -0
- bigdl-core-npu/ggml.lib +0 -0
- bigdl-core-npu/include/llamacpp/arg.h +77 -0
- bigdl-core-npu/include/llamacpp/common.h +563 -0
- bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
- bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
- bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
- bigdl-core-npu/include/llamacpp/llama.h +1234 -0
- bigdl-core-npu/include/llamacpp/log.h +92 -0
- bigdl-core-npu/include/npu/npu_common.h +119 -0
- bigdl-core-npu/include/npu/npu_llm.h +77 -0
- bigdl-core-npu/llama-cli-npu.exe +0 -0
- bigdl-core-npu/llama.dll +0 -0
- bigdl-core-npu/llama.lib +0 -0
- bigdl-core-npu/llm-cli.exe +0 -0
- bigdl-core-npu/npu_llm.dll +0 -0
- bigdl-core-npu/npu_llm.lib +0 -0
- bigdl-core-npu/zlib1.dll +0 -0
- bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/base.py +39 -4
- intel_npu_acceleration_library/backend/bindings.py +109 -5
- intel_npu_acceleration_library/backend/factory.py +264 -47
- intel_npu_acceleration_library/backend/ops.py +2 -1
- intel_npu_acceleration_library/backend/qlinear.py +8 -4
- intel_npu_acceleration_library/backend/runtime.py +7 -2
- intel_npu_acceleration_library/backend/tensor.py +73 -3
- intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/device.py +2 -2
- intel_npu_acceleration_library/dtypes.py +34 -1
- intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
- intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
- intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
- intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
- intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
- intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
- intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
- intel_npu_acceleration_library/external/openvino/utils.py +17 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/nn/module.py +17 -17
@@ -27,6 +27,7 @@ def run_matmul(
|
|
27
27
|
x: torch.Tensor,
|
28
28
|
weights: torch.Tensor,
|
29
29
|
scale: Optional[torch.Tensor] = None,
|
30
|
+
zero: Optional[torch.Tensor] = None,
|
30
31
|
op_id: Optional[str] = None,
|
31
32
|
) -> torch.Tensor:
|
32
33
|
"""Run a matmul operation. Depending on the datatype of the weights it runs a float or quantized operation.
|
@@ -35,6 +36,7 @@ def run_matmul(
|
|
35
36
|
x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
|
36
37
|
weights (torch.Tensor): Weights tensor. Its dtype can be torch.float16 or torch.int8
|
37
38
|
scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
|
39
|
+
zero (Optional[torch.Tensor], optional): Quantization zero for asym_int4. If weights.dtype == torch.uint8 and use asym_int4 then it must be set and asym Defaults to None.
|
38
40
|
op_id (Optional[str], optional): Operation ID. Defaults to None.
|
39
41
|
|
40
42
|
Raises:
|
@@ -68,12 +70,15 @@ def run_matmul(
|
|
68
70
|
op_class = QLinear if op_id is not None else QMatMul
|
69
71
|
op_class_name = op_class.__name__
|
70
72
|
np_dtype = np.int8 if weights.dtype == torch.int8 else np.uint8
|
71
|
-
create_op = partial(op_class, dtype=np_dtype)
|
73
|
+
create_op = partial(op_class, dtype=np_dtype, asym=(zero is not None))
|
72
74
|
if scale is None:
|
73
75
|
raise RuntimeError(
|
74
76
|
f"Quantized matmul (weights dtype == {weights.dtype}) requires scale (scale = {scale})"
|
75
77
|
)
|
76
|
-
|
78
|
+
if zero is None:
|
79
|
+
op_args = [weights.numpy(), scale.numpy()]
|
80
|
+
else:
|
81
|
+
op_args = [weights.numpy(), scale.numpy(), zero.numpy()]
|
77
82
|
else:
|
78
83
|
raise RuntimeError(f"Unsupported dtype for weights {weights.dtype}")
|
79
84
|
|
@@ -16,14 +16,83 @@ from intel_npu_acceleration_library.dtypes import (
|
|
16
16
|
int32,
|
17
17
|
int64,
|
18
18
|
NPUDtype,
|
19
|
+
get_backend_dtype,
|
19
20
|
)
|
20
21
|
from dataclasses import dataclass
|
21
22
|
import functools
|
23
|
+
from math import prod
|
22
24
|
import numpy as np
|
23
25
|
import ctypes
|
24
26
|
import torch
|
25
27
|
|
26
28
|
|
29
|
+
class RemoteTensor(torch.Tensor):
|
30
|
+
"""
|
31
|
+
Represent a remote tensor object.
|
32
|
+
Attrs:
|
33
|
+
_remote_tensor (ctypes._Pointer): The pointer to the underlying remote tensor.
|
34
|
+
Methods:
|
35
|
+
from_torch(x: torch.Tensor): Create a remote tensor from a torch tensor.
|
36
|
+
"""
|
37
|
+
|
38
|
+
_remote_tensor = None
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def __new__(cls, x: Any, remote_tensor: ctypes._Pointer, *args: Any, **kwargs: Any):
|
42
|
+
"""
|
43
|
+
Create a new remote tensor object.
|
44
|
+
Args:
|
45
|
+
x (Any): tensor input
|
46
|
+
remote_tensor (ctypes._Pointer): remote tensor pointer
|
47
|
+
args (Any): additional arguments
|
48
|
+
kwargs (Any): additional keyword arguments
|
49
|
+
Returns:
|
50
|
+
RemoteTensor: a RemoteTensor object
|
51
|
+
"""
|
52
|
+
return super().__new__(cls, x, *args, **kwargs)
|
53
|
+
|
54
|
+
def __init__(self, x: Any, remote_tensor: ctypes._Pointer):
|
55
|
+
"""
|
56
|
+
Initialize the remote tensor object.
|
57
|
+
Args:
|
58
|
+
x (Any): tensor input
|
59
|
+
remote_tensor (ctypes._Pointer): remote tensor pointer
|
60
|
+
"""
|
61
|
+
self._remote_tensor = remote_tensor
|
62
|
+
|
63
|
+
# def __del__(self):
|
64
|
+
# if self._remote_tensor and backend_lib:
|
65
|
+
# backend_lib.del_remote_tensor(self._remote_tensor)
|
66
|
+
|
67
|
+
@staticmethod
|
68
|
+
def from_torch(x: torch.Tensor) -> "RemoteTensor":
|
69
|
+
"""
|
70
|
+
Create a remote tensor from a torch tensor.
|
71
|
+
Args:
|
72
|
+
x (torch.Tensor): The torch tensor.
|
73
|
+
Returns:
|
74
|
+
RemoteTensor: The remote tensor.
|
75
|
+
"""
|
76
|
+
shape_arr = np.array(x.shape, dtype=np.uint32)
|
77
|
+
dtype_str = get_backend_dtype(x.dtype)
|
78
|
+
p = ctypes.cast(x.data_ptr(), ctypes.c_void_p)
|
79
|
+
|
80
|
+
rt = backend_lib.to_npu(shape_arr.size, shape_arr, dtype_str, p)
|
81
|
+
|
82
|
+
pointer = ctypes.cast(
|
83
|
+
backend_lib.remote_tensor_data(rt),
|
84
|
+
ctypes.POINTER(ctypes.c_uint8),
|
85
|
+
)
|
86
|
+
|
87
|
+
arr = (pointer._type_ * prod(x.shape) * x.element_size()).from_address(
|
88
|
+
ctypes.addressof(pointer.contents)
|
89
|
+
)
|
90
|
+
|
91
|
+
pt_tensor = torch.frombuffer(arr, dtype=x.dtype).view(*x.shape)
|
92
|
+
|
93
|
+
return RemoteTensor(pt_tensor, rt)
|
94
|
+
|
95
|
+
|
27
96
|
@dataclass
|
28
97
|
class Tensor:
|
29
98
|
"""
|
@@ -88,6 +157,7 @@ class Tensor:
|
|
88
157
|
|
89
158
|
factory: "NNFactory" # type: ignore # noqa: F821
|
90
159
|
node: ctypes._Pointer
|
160
|
+
output_idx: int
|
91
161
|
|
92
162
|
@property
|
93
163
|
def shape(self) -> Sequence[int]:
|
@@ -97,8 +167,8 @@ class Tensor:
|
|
97
167
|
Returns:
|
98
168
|
Sequence[int]: The shape of the tensor.
|
99
169
|
"""
|
100
|
-
shape_size = backend_lib.op_shape_size(self.node)
|
101
|
-
return [backend_lib.op_shape(self.node, i) for i in range(shape_size)]
|
170
|
+
shape_size = backend_lib.op_shape_size(self.node, self.output_idx)
|
171
|
+
return [backend_lib.op_shape(self.node, i, self.output_idx) for i in range(shape_size)]
|
102
172
|
|
103
173
|
@property
|
104
174
|
def dtype(self) -> NPUDtype:
|
@@ -108,7 +178,7 @@ class Tensor:
|
|
108
178
|
Returns:
|
109
179
|
type: The data type of the tensor.
|
110
180
|
"""
|
111
|
-
dtype_int = backend_lib.op_dtype(self.node)
|
181
|
+
dtype_int = backend_lib.op_dtype(self.node, self.output_idx)
|
112
182
|
|
113
183
|
if dtype_int == 2:
|
114
184
|
return np.bool
|