bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl-core-npu/__init__.py +0 -0
- bigdl-core-npu/common.lib +0 -0
- bigdl-core-npu/ggml.dll +0 -0
- bigdl-core-npu/ggml.lib +0 -0
- bigdl-core-npu/include/llamacpp/arg.h +77 -0
- bigdl-core-npu/include/llamacpp/common.h +563 -0
- bigdl-core-npu/include/llamacpp/ggml-alloc.h +76 -0
- bigdl-core-npu/include/llamacpp/ggml-backend.h +241 -0
- bigdl-core-npu/include/llamacpp/ggml.h +2679 -0
- bigdl-core-npu/include/llamacpp/llama.h +1234 -0
- bigdl-core-npu/include/llamacpp/log.h +92 -0
- bigdl-core-npu/include/npu/npu_common.h +119 -0
- bigdl-core-npu/include/npu/npu_llm.h +77 -0
- bigdl-core-npu/llama-cli-npu.exe +0 -0
- bigdl-core-npu/llama.dll +0 -0
- bigdl-core-npu/llama.lib +0 -0
- bigdl-core-npu/llm-cli.exe +0 -0
- bigdl-core-npu/npu_llm.dll +0 -0
- bigdl-core-npu/npu_llm.lib +0 -0
- bigdl-core-npu/zlib1.dll +0 -0
- bigdl_core_npu-2.6.0.data/scripts/init-llama-cpp.bat +29 -0
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/METADATA +12 -3
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/RECORD +146 -96
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/WHEEL +1 -1
- {bigdl_core_npu-2.5.0.dist-info → bigdl_core_npu-2.6.0.dist-info}/top_level.txt +1 -0
- intel_npu_acceleration_library/_version.py +1 -1
- intel_npu_acceleration_library/backend/base.py +39 -4
- intel_npu_acceleration_library/backend/bindings.py +109 -5
- intel_npu_acceleration_library/backend/factory.py +264 -47
- intel_npu_acceleration_library/backend/ops.py +2 -1
- intel_npu_acceleration_library/backend/qlinear.py +8 -4
- intel_npu_acceleration_library/backend/runtime.py +7 -2
- intel_npu_acceleration_library/backend/tensor.py +73 -3
- intel_npu_acceleration_library/bigdl-core-npu/cache.json +113732 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_c.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/bigdl-core-npu/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/device.py +2 -2
- intel_npu_acceleration_library/dtypes.py +34 -1
- intel_npu_acceleration_library/external/openvino/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
- intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +37 -19
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +47 -6
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +28 -8
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +17 -5
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +1 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +55 -47
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +95 -63
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +12 -10
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +31 -10
- intel_npu_acceleration_library/external/openvino/helpers/packing.py +4 -4
- intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +1 -0
- intel_npu_acceleration_library/external/openvino/properties/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +1 -1
- intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +1 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +2 -1
- intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +5 -6
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +7 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +193 -2
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +69 -43
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +4 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +21 -3
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +88 -2
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +9 -9
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +16 -2
- intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +5 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +68 -16
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +69 -60
- intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +90 -3
- intel_npu_acceleration_library/external/openvino/utils.py +17 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/nn/module.py +17 -17
@@ -67,8 +67,9 @@ def init_common(lib: ctypes.CDLL):
|
|
67
67
|
Args:
|
68
68
|
lib (ctypes.CDLL): Intel® NPU Acceleration Library runtime library
|
69
69
|
"""
|
70
|
-
lib.saveModel.argtypes = [handler, ctypes.c_char_p]
|
70
|
+
lib.saveModel.argtypes = [handler, ctypes.c_char_p, ctypes.c_bool]
|
71
71
|
lib.saveCompiledModel.argtypes = [handler, ctypes.c_char_p]
|
72
|
+
lib.serializeModel.argtypes = [handler, ctypes.c_char_p, ctypes.c_char_p]
|
72
73
|
|
73
74
|
# Set input activations
|
74
75
|
lib.set_activation.argtypes = [handler, ctypes.c_void_p, ctypes.c_int]
|
@@ -91,6 +92,16 @@ def init_common(lib: ctypes.CDLL):
|
|
91
92
|
|
92
93
|
lib.compressToI4.argtypes = [c_i8_array, c_u8_array, ctypes.c_int]
|
93
94
|
|
95
|
+
# Remote tensors
|
96
|
+
lib.to_npu.argtypes = [ctypes.c_int, c_u32_array, ctypes.c_char_p, ctypes.c_void_p]
|
97
|
+
lib.to_npu.restype = handler
|
98
|
+
|
99
|
+
lib.remote_tensor_data.argtypes = [handler]
|
100
|
+
lib.remote_tensor_data.restype = ctypes.c_void_p
|
101
|
+
|
102
|
+
lib.del_remote_tensor.argtypes = [handler]
|
103
|
+
|
104
|
+
|
94
105
|
|
95
106
|
def init_network_factory(lib: ctypes.CDLL):
|
96
107
|
"""Initialize Netowrk factory bindings.
|
@@ -106,15 +117,18 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
106
117
|
|
107
118
|
lib.setNNFactoryWeights.argtypes = [handler, ctypes.c_int, handler, ctypes.c_bool]
|
108
119
|
|
109
|
-
lib.op_shape_size.argtypes = [handler]
|
120
|
+
lib.op_shape_size.argtypes = [handler, ctypes.c_int]
|
110
121
|
lib.op_shape_size.restype = ctypes.c_int
|
111
122
|
|
112
|
-
lib.op_shape.argtypes = [handler, ctypes.c_int]
|
123
|
+
lib.op_shape.argtypes = [handler, ctypes.c_int, ctypes.c_int]
|
113
124
|
lib.op_shape.restype = ctypes.c_int
|
114
125
|
|
115
|
-
lib.op_dtype.argtypes = [handler]
|
126
|
+
lib.op_dtype.argtypes = [handler, ctypes.c_int]
|
116
127
|
lib.op_dtype.restype = ctypes.c_int
|
117
128
|
|
129
|
+
lib.op_output_size.argtypes = [handler]
|
130
|
+
lib.op_output_size.restype = ctypes.c_int
|
131
|
+
|
118
132
|
lib.parameter.argtypes = [handler, ctypes.c_int, c_u32_array, ctypes.c_char_p]
|
119
133
|
lib.parameter.restype = handler
|
120
134
|
|
@@ -143,7 +157,16 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
143
157
|
]
|
144
158
|
lib.slice.restype = handler
|
145
159
|
|
146
|
-
lib.
|
160
|
+
lib.simple_slice.argtypes = [
|
161
|
+
handler,
|
162
|
+
handler,
|
163
|
+
handler,
|
164
|
+
handler,
|
165
|
+
handler
|
166
|
+
]
|
167
|
+
lib.simple_slice.restype = handler
|
168
|
+
|
169
|
+
lib.compile.argtypes = [handler, ctypes.c_int]
|
147
170
|
lib.compile.restype = handler
|
148
171
|
|
149
172
|
lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
|
@@ -160,6 +183,8 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
160
183
|
ctypes.c_bool,
|
161
184
|
ctypes.c_char_p,
|
162
185
|
ctypes.c_char_p,
|
186
|
+
ctypes.c_bool,
|
187
|
+
ctypes.c_bool,
|
163
188
|
]
|
164
189
|
lib.linear.restype = handler
|
165
190
|
|
@@ -214,6 +239,65 @@ def init_network_factory(lib: ctypes.CDLL):
|
|
214
239
|
]
|
215
240
|
lib.max_pooling.restype = handler
|
216
241
|
|
242
|
+
|
243
|
+
lib.multi_concat.argtypes = [
|
244
|
+
handler,
|
245
|
+
ctypes.POINTER(handler),
|
246
|
+
ctypes.c_uint64,
|
247
|
+
ctypes.c_int64,
|
248
|
+
]
|
249
|
+
lib.multi_concat.restype = handler
|
250
|
+
|
251
|
+
lib.variadic_split.argtypes = [
|
252
|
+
handler,
|
253
|
+
handler,
|
254
|
+
ctypes.c_int,
|
255
|
+
c_u32_array,
|
256
|
+
ctypes.c_int,
|
257
|
+
]
|
258
|
+
lib.variadic_split.restype = handler
|
259
|
+
|
260
|
+
lib.dq_split_linear.argtypes = [
|
261
|
+
handler,
|
262
|
+
handler,
|
263
|
+
ctypes.c_int,
|
264
|
+
ctypes.c_int,
|
265
|
+
ctypes.c_int,
|
266
|
+
ctypes.c_bool,
|
267
|
+
ctypes.c_char_p,
|
268
|
+
ctypes.c_char_p,
|
269
|
+
ctypes.c_bool,
|
270
|
+
ctypes.c_bool,
|
271
|
+
]
|
272
|
+
lib.dq_split_linear.restype = handler
|
273
|
+
|
274
|
+
lib.dq_split_linear_prefill.argtypes = [
|
275
|
+
handler,
|
276
|
+
handler,
|
277
|
+
ctypes.c_int,
|
278
|
+
ctypes.c_int,
|
279
|
+
ctypes.c_int,
|
280
|
+
ctypes.c_bool,
|
281
|
+
ctypes.c_char_p,
|
282
|
+
ctypes.c_char_p,
|
283
|
+
ctypes.c_bool,
|
284
|
+
ctypes.c_bool,
|
285
|
+
]
|
286
|
+
lib.dq_split_linear_prefill.restype = handler
|
287
|
+
|
288
|
+
lib.gw_linear_prefill.argtypes = [
|
289
|
+
handler,
|
290
|
+
handler,
|
291
|
+
ctypes.c_int,
|
292
|
+
ctypes.c_int,
|
293
|
+
ctypes.c_int,
|
294
|
+
ctypes.c_bool,
|
295
|
+
ctypes.c_char_p,
|
296
|
+
ctypes.c_char_p,
|
297
|
+
ctypes.c_bool,
|
298
|
+
]
|
299
|
+
lib.gw_linear_prefill.restype = handler
|
300
|
+
|
217
301
|
for op in get_supported_ops():
|
218
302
|
fn = getattr(lib, op.name)
|
219
303
|
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
|
@@ -252,6 +336,19 @@ def init_parameters(lib: ctypes.CDLL):
|
|
252
336
|
ctypes.c_int,
|
253
337
|
]
|
254
338
|
|
339
|
+
lib.addAsymInt4Parameter.argtypes = [
|
340
|
+
handler,
|
341
|
+
c_u8_array,
|
342
|
+
c_fp16_array,
|
343
|
+
c_fp16_array,
|
344
|
+
ctypes.c_int,
|
345
|
+
ctypes.c_int,
|
346
|
+
ctypes.c_int,
|
347
|
+
ctypes.c_int,
|
348
|
+
ctypes.c_int,
|
349
|
+
ctypes.c_int,
|
350
|
+
]
|
351
|
+
|
255
352
|
lib.addIntParameterConversion.argtypes = [
|
256
353
|
handler,
|
257
354
|
c_i8_array,
|
@@ -260,6 +357,13 @@ def init_parameters(lib: ctypes.CDLL):
|
|
260
357
|
ctypes.c_int,
|
261
358
|
]
|
262
359
|
|
360
|
+
lib.addInt4WeightParameter.argtypes = [
|
361
|
+
handler,
|
362
|
+
c_u8_array,
|
363
|
+
ctypes.c_int,
|
364
|
+
ctypes.c_int,
|
365
|
+
]
|
366
|
+
|
263
367
|
|
264
368
|
def initialize_bindings() -> ctypes.CDLL:
|
265
369
|
"""Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.
|
@@ -7,7 +7,7 @@ from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefet
|
|
7
7
|
from intel_npu_acceleration_library.backend.ops import get_supported_ops
|
8
8
|
from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
|
9
9
|
from intel_npu_acceleration_library.backend.tensor import Tensor
|
10
|
-
from intel_npu_acceleration_library.dtypes import int4, bfloat16
|
10
|
+
from intel_npu_acceleration_library.dtypes import int4, bfloat16, get_backend_dtype
|
11
11
|
from typing import Optional, Tuple, Any, Union, Sequence, TypeVar, Callable, cast, List
|
12
12
|
from functools import partial
|
13
13
|
import numpy.typing as npt
|
@@ -71,17 +71,99 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
71
71
|
Tensor: Tensor object
|
72
72
|
"""
|
73
73
|
# Convert Tensor objects to their underlying node
|
74
|
-
args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
75
74
|
kwargs = {
|
76
75
|
k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
|
77
76
|
}
|
78
77
|
|
78
|
+
if fn.__qualname__ == 'NNFactory.reshape':
|
79
|
+
output_idx = args[0].output_idx
|
80
|
+
kwargs["output_idx"] = output_idx
|
81
|
+
args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
82
|
+
|
83
|
+
|
79
84
|
input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
|
80
85
|
v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
|
81
86
|
]
|
82
87
|
# Call the function
|
83
88
|
node = fn(self, *args, **kwargs)
|
84
89
|
|
90
|
+
output_len = backend_lib.op_output_size(node)
|
91
|
+
|
92
|
+
# remove input nodes from output_nodes
|
93
|
+
self.output_nodes = [
|
94
|
+
node for node in self.output_nodes if node not in input_nodes
|
95
|
+
]
|
96
|
+
# add output node to output_nodes
|
97
|
+
if fn.__name__ != "constant":
|
98
|
+
self.output_nodes.append(node)
|
99
|
+
|
100
|
+
# Wrap the node in a Tensor object
|
101
|
+
if output_len == 1:
|
102
|
+
return Tensor(factory=self, node=node, output_idx=0)
|
103
|
+
else:
|
104
|
+
output_tensor_list = []
|
105
|
+
for i in range(output_len):
|
106
|
+
output_tensor_list.append(Tensor(factory=self, node=node, output_idx=i))
|
107
|
+
return output_tensor_list
|
108
|
+
|
109
|
+
return cast(F, wrapper)
|
110
|
+
|
111
|
+
def return_tensor_for_list_inputs(fn: F) -> F: # type: ignore
|
112
|
+
"""Wrap the output of a function in a Tensor object.
|
113
|
+
This new wrapper add support for List Tensor input.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
fn (function): Function
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
function: A function that wraps the output in a Tensor object
|
120
|
+
"""
|
121
|
+
|
122
|
+
def wrapper(self, *args: Any, **kwargs: Any) -> Tensor:
|
123
|
+
"""Wrap the output of a function in a Tensor object.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
args (Any): Variable length argument list
|
127
|
+
kwargs (Any): Arbitrary keyword arguments
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
Tensor: Tensor object
|
131
|
+
"""
|
132
|
+
# Convert Tensor objects to their underlying node
|
133
|
+
# args = tuple(arg.node if isinstance(arg, Tensor) else arg for arg in args)
|
134
|
+
new_args = []
|
135
|
+
for arg in args:
|
136
|
+
if isinstance(arg, Tensor):
|
137
|
+
new_args.append(arg.node)
|
138
|
+
elif isinstance(arg, (tuple, list)):
|
139
|
+
# for item in arg:
|
140
|
+
for i in range(len(arg)):
|
141
|
+
if isinstance(arg[i], Tensor):
|
142
|
+
arg[i] = arg[i].node
|
143
|
+
new_args.append(arg)
|
144
|
+
else:
|
145
|
+
new_args.append(arg)
|
146
|
+
args = tuple(new_args)
|
147
|
+
kwargs = {
|
148
|
+
k: v.node if isinstance(v, Tensor) else v for k, v in kwargs.items()
|
149
|
+
}
|
150
|
+
|
151
|
+
# input_nodes = [arg for arg in args if isinstance(arg, ctypes._Pointer)] + [
|
152
|
+
# v for v in kwargs.values() if isinstance(v, ctypes._Pointer)
|
153
|
+
# ]
|
154
|
+
input_nodes = []
|
155
|
+
for arg in args:
|
156
|
+
if isinstance(arg, ctypes._Pointer):
|
157
|
+
input_nodes.append(arg)
|
158
|
+
elif isinstance(arg, (tuple, list)):
|
159
|
+
for item in arg:
|
160
|
+
if isinstance(item, ctypes._Pointer):
|
161
|
+
input_nodes.append(item)
|
162
|
+
input_nodes += [v for v in kwargs.values() if isinstance(v, ctypes._Pointer)]
|
163
|
+
|
164
|
+
# Call the function
|
165
|
+
node = fn(self, *args, **kwargs)
|
166
|
+
|
85
167
|
# remove input nodes from output_nodes
|
86
168
|
self.output_nodes = [
|
87
169
|
node for node in self.output_nodes if node not in input_nodes
|
@@ -115,34 +197,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
115
197
|
Args:
|
116
198
|
dtype: numpy dtype
|
117
199
|
|
118
|
-
Raises:
|
119
|
-
RuntimeError: Unsupported datatype
|
120
|
-
|
121
200
|
Returns:
|
122
201
|
ctypes.c_char_p: string representation of the dtype
|
123
202
|
"""
|
124
|
-
|
125
|
-
str_dtype = "int8"
|
126
|
-
elif dtype == np.uint8 or dtype == int4:
|
127
|
-
# u8 represents packed i4 dtypes
|
128
|
-
str_dtype = "int4"
|
129
|
-
elif dtype in [np.int16, torch.int16]:
|
130
|
-
str_dtype = "int16"
|
131
|
-
elif dtype in [np.int32, torch.int32]:
|
132
|
-
str_dtype = "int32"
|
133
|
-
elif dtype in [np.int64, torch.int64]:
|
134
|
-
str_dtype = "int64"
|
135
|
-
elif dtype in [np.float16, torch.float16]:
|
136
|
-
str_dtype = "float16"
|
137
|
-
elif dtype in [np.float32, torch.float32]:
|
138
|
-
str_dtype = "float32"
|
139
|
-
elif dtype in [np.float64, torch.float64]:
|
140
|
-
str_dtype = "float64"
|
141
|
-
elif dtype in [bfloat16, torch.bfloat16]:
|
142
|
-
str_dtype = "bfloat16"
|
143
|
-
else:
|
144
|
-
raise RuntimeError(f"DType is not supported {dtype}")
|
145
|
-
return ctypes.c_char_p(str_dtype.encode())
|
203
|
+
return get_backend_dtype(dtype)
|
146
204
|
|
147
205
|
@return_tensor
|
148
206
|
def parameter(
|
@@ -319,6 +377,8 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
319
377
|
bias: Optional[bool] = False,
|
320
378
|
act_dtype: npt.DTypeLike = np.float16,
|
321
379
|
wt_dtype: npt.DTypeLike = np.float16,
|
380
|
+
scale_factor: bool = True,
|
381
|
+
asym: bool=False,
|
322
382
|
) -> ctypes._Pointer:
|
323
383
|
"""Generate a linear layer.
|
324
384
|
|
@@ -341,11 +401,52 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
341
401
|
bias,
|
342
402
|
self.get_backend_dtype(act_dtype),
|
343
403
|
self.get_backend_dtype(wt_dtype),
|
404
|
+
scale_factor,
|
405
|
+
asym
|
344
406
|
)
|
407
|
+
|
408
|
+
@return_tensor
|
409
|
+
def dq_split_linear(
|
410
|
+
self, input_node: ctypes._Pointer, n_splits: int,
|
411
|
+
outout_channels: int, input_channels: int, bias: bool = False,
|
412
|
+
act_dtype: npt.DTypeLike = np.float16,
|
413
|
+
wt_dtype: npt.DTypeLike = np.float16,
|
414
|
+
scale_factor: bool = True,
|
415
|
+
is_prefill: bool = False,
|
416
|
+
use_dq: bool = True,
|
417
|
+
asym: bool = False,
|
418
|
+
) -> ctypes._Pointer:
|
419
|
+
"""Generate a linear layer for dynamic quantization linear layer.
|
420
|
+
|
421
|
+
Args:
|
422
|
+
input_node (ctypes._Pointer): layer input node
|
423
|
+
n_splits (int): number of parts the linear layer is split into
|
424
|
+
output_channels (int): number of output channels
|
425
|
+
input_channels (int): number of input channels
|
426
|
+
bias (bool, optional): enable/disable bias. Defaults to False.
|
427
|
+
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
|
428
|
+
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
|
429
|
+
scale_factor (bool, optional): enable/disable mul scale factor. Default to True,
|
430
|
+
is_prefill (bool, optional): enable/disable prefill linear optimization. Default to False.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
ctypes._Pointer: output node
|
434
|
+
"""
|
435
|
+
if is_prefill:
|
436
|
+
func = backend_lib.dq_split_linear_prefill if use_dq else backend_lib.gw_linear_prefill
|
437
|
+
else:
|
438
|
+
func = backend_lib.dq_split_linear
|
439
|
+
return func(self._mm, input_node, n_splits,
|
440
|
+
input_channels, outout_channels, bias,
|
441
|
+
self.get_backend_dtype(act_dtype),
|
442
|
+
self.get_backend_dtype(wt_dtype),
|
443
|
+
scale_factor, asym)
|
345
444
|
|
346
445
|
@return_tensor
|
347
446
|
def reshape(
|
348
|
-
self, input_node: ctypes._Pointer, shape: Sequence[int]
|
447
|
+
self, input_node: ctypes._Pointer, shape: Sequence[int],
|
448
|
+
special_zero: bool = True,
|
449
|
+
output_idx: int = 0
|
349
450
|
) -> ctypes._Pointer:
|
350
451
|
"""Generate a reshape layer.
|
351
452
|
|
@@ -357,7 +458,8 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
357
458
|
ctypes._Pointer: output node
|
358
459
|
"""
|
359
460
|
shape_node = self.constant(shape).node # type: ignore
|
360
|
-
return backend_lib.reshape(self._mm, input_node, shape_node
|
461
|
+
return backend_lib.reshape(self._mm, input_node, shape_node,
|
462
|
+
special_zero, output_idx)
|
361
463
|
|
362
464
|
@return_tensor
|
363
465
|
def broadcast(
|
@@ -453,6 +555,46 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
453
555
|
end_mask_ptr.size,
|
454
556
|
end_mask_ptr,
|
455
557
|
)
|
558
|
+
|
559
|
+
@return_tensor
|
560
|
+
def simple_slice(
|
561
|
+
self,
|
562
|
+
input_node: ctypes._Pointer,
|
563
|
+
begin: Sequence[int],
|
564
|
+
end: Sequence[int],
|
565
|
+
step: Optional[Sequence[int]] = None,
|
566
|
+
) -> ctypes._Pointer:
|
567
|
+
"""Generate an unsqueeze layer.
|
568
|
+
|
569
|
+
Args:
|
570
|
+
input_node (ctypes._Pointer): layer input node
|
571
|
+
begin (Sequence[int]): begin
|
572
|
+
end (Sequence[int]): end
|
573
|
+
stride (Optional[Sequence[int]]): stride
|
574
|
+
|
575
|
+
Raises:
|
576
|
+
ValueError: begin and end must have the same length
|
577
|
+
|
578
|
+
Returns:
|
579
|
+
ctypes._Pointer: output node
|
580
|
+
"""
|
581
|
+
if len(begin) != len(end):
|
582
|
+
raise ValueError("begin and end must have the same length")
|
583
|
+
|
584
|
+
if step is None:
|
585
|
+
step = [1] * len(begin)
|
586
|
+
|
587
|
+
begin = self.constant(begin).node # type: ignore
|
588
|
+
end = self.constant(end).node # type: ignore
|
589
|
+
step = self.constant(step).node # type: ignore
|
590
|
+
|
591
|
+
return backend_lib.simple_slice(
|
592
|
+
self._mm,
|
593
|
+
input_node,
|
594
|
+
begin,
|
595
|
+
end,
|
596
|
+
step
|
597
|
+
)
|
456
598
|
|
457
599
|
@return_tensor
|
458
600
|
def concat(
|
@@ -469,11 +611,32 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
469
611
|
ctypes._Pointer: output node
|
470
612
|
"""
|
471
613
|
if axis < 0:
|
472
|
-
shape_size = backend_lib.op_shape_size(input_node_1)
|
614
|
+
shape_size = backend_lib.op_shape_size(input_node_1, 0)
|
473
615
|
axis = (axis + shape_size) % shape_size
|
474
616
|
axis = np.int64(axis)
|
475
617
|
return backend_lib.concat(self._mm, input_node_1, input_node_2, axis)
|
476
618
|
|
619
|
+
@return_tensor_for_list_inputs
|
620
|
+
def sequence_concat(
|
621
|
+
self, input_nodes: List[ctypes._Pointer], axis: int
|
622
|
+
) -> ctypes._Pointer:
|
623
|
+
"""Generate a concatenation layer.
|
624
|
+
|
625
|
+
Args:
|
626
|
+
input_nodes (List[ctypes._Pointer]): sequence of layer input node
|
627
|
+
axis (int): axis
|
628
|
+
|
629
|
+
Returns:
|
630
|
+
ctypes._Pointer: output node
|
631
|
+
"""
|
632
|
+
if axis < 0:
|
633
|
+
shape_size = backend_lib.op_shape_size(input_nodes[0], 0)
|
634
|
+
axis = (axis + shape_size) % shape_size
|
635
|
+
axis = np.int64(axis)
|
636
|
+
|
637
|
+
input_ptr = (ctypes.POINTER(ctypes.c_char) * len(input_nodes))(*input_nodes)
|
638
|
+
return backend_lib.multi_concat(self._mm, input_ptr, len(input_nodes), axis)
|
639
|
+
|
477
640
|
@return_tensor
|
478
641
|
def reduce_max(
|
479
642
|
self,
|
@@ -492,7 +655,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
492
655
|
ctypes._Pointer: output node
|
493
656
|
"""
|
494
657
|
if reduction_axes is None:
|
495
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
658
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
496
659
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
497
660
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
498
661
|
return backend_lib.reduce_max(self._mm, input_node, axis_node, keep_dims)
|
@@ -515,7 +678,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
515
678
|
ctypes._Pointer: output node
|
516
679
|
"""
|
517
680
|
if reduction_axes is None:
|
518
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
681
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
519
682
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
520
683
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
521
684
|
return backend_lib.reduce_mean(self._mm, input_node, axis_node, keep_dims)
|
@@ -538,7 +701,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
538
701
|
ctypes._Pointer: output node
|
539
702
|
"""
|
540
703
|
if reduction_axes is None:
|
541
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
704
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
542
705
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
543
706
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
544
707
|
return backend_lib.reduce_min(self._mm, input_node, axis_node, keep_dims)
|
@@ -561,7 +724,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
561
724
|
ctypes._Pointer: output node
|
562
725
|
"""
|
563
726
|
if reduction_axes is None:
|
564
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
727
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
565
728
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
566
729
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
567
730
|
return backend_lib.reduce_prod(self._mm, input_node, axis_node, keep_dims)
|
@@ -584,7 +747,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
584
747
|
ctypes._Pointer: output node
|
585
748
|
"""
|
586
749
|
if reduction_axes is None:
|
587
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
750
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
588
751
|
reduction_axes = list(range(shape_size - 1, -1, -1))
|
589
752
|
axis_node = self.constant(reduction_axes).node # type: ignore
|
590
753
|
return backend_lib.reduce_sum(self._mm, input_node, axis_node, keep_dims)
|
@@ -604,7 +767,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
604
767
|
ctypes._Pointer: output node
|
605
768
|
"""
|
606
769
|
if axis < 0:
|
607
|
-
shape_size = backend_lib.op_shape_size(input_node)
|
770
|
+
shape_size = backend_lib.op_shape_size(input_node, 0)
|
608
771
|
axis = (axis + shape_size) % shape_size
|
609
772
|
axis_node = self.constant(axis).node # type: ignore
|
610
773
|
return backend_lib.normL2(self._mm, input_node, axis_node, eps)
|
@@ -627,14 +790,14 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
627
790
|
Returns:
|
628
791
|
ctypes._Pointer: output node
|
629
792
|
"""
|
630
|
-
input_shape_size = backend_lib.op_shape_size(input_node)
|
793
|
+
input_shape_size = backend_lib.op_shape_size(input_node, 0)
|
631
794
|
input_shape = [
|
632
|
-
backend_lib.op_shape(input_node, i) for i in range(input_shape_size)
|
795
|
+
backend_lib.op_shape(input_node, i, 0) for i in range(input_shape_size)
|
633
796
|
]
|
634
797
|
if isinstance(exponent, ctypes._Pointer):
|
635
|
-
exponent_shape_size = backend_lib.op_shape_size(input_node)
|
798
|
+
exponent_shape_size = backend_lib.op_shape_size(input_node, 0)
|
636
799
|
exponent_shape = [
|
637
|
-
backend_lib.op_shape(exponent, i) for i in range(exponent_shape_size)
|
800
|
+
backend_lib.op_shape(exponent, i, 0) for i in range(exponent_shape_size)
|
638
801
|
]
|
639
802
|
else:
|
640
803
|
exponent_shape = list(exponent.shape)
|
@@ -643,6 +806,39 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
643
806
|
# raise ValueError("Input tensor shapes are not equal")
|
644
807
|
|
645
808
|
return backend_lib.power(self._mm, input_node, exponent)
|
809
|
+
|
810
|
+
@return_tensor
|
811
|
+
def variadic_split(
|
812
|
+
self,
|
813
|
+
input: ctypes._Pointer,
|
814
|
+
axis: int,
|
815
|
+
split_lengths: Sequence[int],
|
816
|
+
) -> ctypes._Pointer:
|
817
|
+
"""Generate an average pooling layer.
|
818
|
+
|
819
|
+
Args:
|
820
|
+
input (ctypes._Pointer): layer input node
|
821
|
+
axis (int): split axis
|
822
|
+
split_lengths (Sequence[int]): A list containing the sizes of each output tensor
|
823
|
+
along the split "axis". Size of "split_lengths" should be equal to the number of
|
824
|
+
outputs. The sum of split_lengths must match data.shape[axis]
|
825
|
+
|
826
|
+
Raises:
|
827
|
+
NotImplementedError: divisor_override is not supported
|
828
|
+
|
829
|
+
Returns:
|
830
|
+
ctypes._Pointer: output node
|
831
|
+
"""
|
832
|
+
|
833
|
+
split_lens_ptr = np.array(split_lengths, dtype=np.uint32)
|
834
|
+
|
835
|
+
return backend_lib.variadic_split(
|
836
|
+
self._mm,
|
837
|
+
input,
|
838
|
+
axis,
|
839
|
+
split_lens_ptr,
|
840
|
+
split_lens_ptr.size,
|
841
|
+
)
|
646
842
|
|
647
843
|
@return_tensor
|
648
844
|
def avg_pooling(
|
@@ -777,7 +973,28 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
777
973
|
auto_pad, # auto_pad
|
778
974
|
)
|
779
975
|
|
780
|
-
|
976
|
+
@return_tensor
|
977
|
+
def scaled_dot_product_attention(
|
978
|
+
self, query: ctypes._Pointer, key: ctypes._Pointer,
|
979
|
+
value: ctypes._Pointer, attn_mask: ctypes._Pointer,
|
980
|
+
is_causal: bool
|
981
|
+
) -> ctypes._Pointer:
|
982
|
+
"""Constructs a ScaledDotProductAttention operation.
|
983
|
+
Args:
|
984
|
+
query (ctypes._Pointer): query
|
985
|
+
key (ctypes._Pointer): key
|
986
|
+
value (ctypes._Pointer): value
|
987
|
+
attn_mask (ctypes._Pointer): attention mask
|
988
|
+
is_causal (ctypes._Pointer): causal/not causal
|
989
|
+
Returns:
|
990
|
+
ctypes._Pointer: output node
|
991
|
+
"""
|
992
|
+
return backend_lib.scaled_dot_product_attention(self._mm,
|
993
|
+
query, key,
|
994
|
+
value, attn_mask,
|
995
|
+
is_causal)
|
996
|
+
|
997
|
+
def get_tensor_shape(self, node, output_idx=0):
|
781
998
|
"""Get tensor shape.
|
782
999
|
|
783
1000
|
Args:
|
@@ -786,10 +1003,10 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
786
1003
|
Returns:
|
787
1004
|
tuple[int]: tensor shape
|
788
1005
|
"""
|
789
|
-
size = backend_lib.op_shape_size(node)
|
790
|
-
return tuple([backend_lib.op_shape(node, idx) for idx in range(size)])
|
1006
|
+
size = backend_lib.op_shape_size(node, output_idx)
|
1007
|
+
return tuple([backend_lib.op_shape(node, idx, output_idx) for idx in range(size)])
|
791
1008
|
|
792
|
-
def get_tensor_dtype(self, node):
|
1009
|
+
def get_tensor_dtype(self, node, output_idx=0):
|
793
1010
|
"""Get tensor dtype.
|
794
1011
|
|
795
1012
|
Args:
|
@@ -801,7 +1018,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
801
1018
|
Returns:
|
802
1019
|
str: tensor dtype
|
803
1020
|
"""
|
804
|
-
dtype_int = backend_lib.op_dtype(node)
|
1021
|
+
dtype_int = backend_lib.op_dtype(node, output_idx)
|
805
1022
|
|
806
1023
|
if dtype_int == 2:
|
807
1024
|
return np.bool
|
@@ -826,7 +1043,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
826
1043
|
else:
|
827
1044
|
raise RuntimeError("Unsupported dtype")
|
828
1045
|
|
829
|
-
def compile(self):
|
1046
|
+
def compile(self, npu_dpu_groups=4):
|
830
1047
|
"""Finalize and compile a model."""
|
831
1048
|
self.out = []
|
832
1049
|
self.torch_out = []
|
@@ -834,7 +1051,7 @@ class NNFactory(BaseNPUBackendWithPrefetch):
|
|
834
1051
|
backend_lib.result(self._mm, node)
|
835
1052
|
|
836
1053
|
# Compile the model
|
837
|
-
backend_lib.compile(self._mm)
|
1054
|
+
backend_lib.compile(self._mm, npu_dpu_groups)
|
838
1055
|
|
839
1056
|
for idx, node in enumerate(self.output_nodes):
|
840
1057
|
output_shape = self.get_tensor_shape(node)
|
@@ -98,7 +98,7 @@ def get_supported_ops() -> List[SupportedOp]:
|
|
98
98
|
inputs=3,
|
99
99
|
parameters=[ctypes.c_int],
|
100
100
|
),
|
101
|
-
SupportedOp(name="reshape", inputs=2),
|
101
|
+
SupportedOp(name="reshape", inputs=2, parameters=[ctypes.c_bool, ctypes.c_int]),
|
102
102
|
SupportedOp(name="transpose", inputs=2),
|
103
103
|
SupportedOp(name="squeeze", inputs=1),
|
104
104
|
SupportedOp(name="unsqueeze", inputs=2),
|
@@ -137,5 +137,6 @@ def get_supported_ops() -> List[SupportedOp]:
|
|
137
137
|
SupportedOp(name="power", inputs=2),
|
138
138
|
SupportedOp(name="broadcast", inputs=2),
|
139
139
|
SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
|
140
|
+
SupportedOp(name="rotate_half", inputs=1),
|
140
141
|
]
|
141
142
|
return supported_ops
|
@@ -18,6 +18,7 @@ class QLinear(NNFactory):
|
|
18
18
|
profile: bool = False,
|
19
19
|
device: str = "NPU",
|
20
20
|
dtype: np.dtype = np.int8,
|
21
|
+
asym: bool = False
|
21
22
|
):
|
22
23
|
"""Initialize the QLinear class.
|
23
24
|
|
@@ -33,13 +34,14 @@ class QLinear(NNFactory):
|
|
33
34
|
super().__init__(profile, device)
|
34
35
|
self.inC, self.outC = inC, outC
|
35
36
|
self.batch = batch
|
37
|
+
self.asym = asym
|
36
38
|
|
37
39
|
input = self.parameter((self.batch, self.inC))
|
38
|
-
_ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype)
|
40
|
+
_ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype, asym=asym)
|
39
41
|
self.compile()
|
40
42
|
|
41
43
|
def run(
|
42
|
-
self, X: np.ndarray, W: np.ndarray, scale: np.ndarray, op_id: str
|
44
|
+
self, X: np.ndarray, W: np.ndarray, scale: np.ndarray, zero: np.ndarray=None, op_id: str=None
|
43
45
|
) -> np.ndarray:
|
44
46
|
"""Run the layer: $X * (W * S)^T$ .
|
45
47
|
|
@@ -67,5 +69,7 @@ class QLinear(NNFactory):
|
|
67
69
|
raise RuntimeError(
|
68
70
|
f"Scale shape {W.shape} different from expected one {(self.outC, 1)}"
|
69
71
|
)
|
70
|
-
|
71
|
-
|
72
|
+
if not self.asym:
|
73
|
+
return super().run(X, (W, scale), op_id=op_id)
|
74
|
+
else:
|
75
|
+
return super().run(X, (W, scale, zero), op_id=op_id)
|