bigdl-core-npu 2.6.0b20250114__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/include/common.h +96 -0
  3. bigdl-core-npu/include/npu_llm.h +74 -0
  4. bigdl-core-npu/npu_llm.dll +0 -0
  5. bigdl-core-npu/npu_llm.lib +0 -0
  6. bigdl_core_npu-2.6.0b20250114.dist-info/METADATA +44 -0
  7. bigdl_core_npu-2.6.0b20250114.dist-info/RECORD +234 -0
  8. bigdl_core_npu-2.6.0b20250114.dist-info/WHEEL +5 -0
  9. bigdl_core_npu-2.6.0b20250114.dist-info/top_level.txt +2 -0
  10. intel_npu_acceleration_library/__init__.py +24 -0
  11. intel_npu_acceleration_library/_version.py +6 -0
  12. intel_npu_acceleration_library/backend/__init__.py +37 -0
  13. intel_npu_acceleration_library/backend/base.py +250 -0
  14. intel_npu_acceleration_library/backend/bindings.py +383 -0
  15. intel_npu_acceleration_library/backend/compression.py +24 -0
  16. intel_npu_acceleration_library/backend/convolution.py +58 -0
  17. intel_npu_acceleration_library/backend/factory.py +1161 -0
  18. intel_npu_acceleration_library/backend/linear.py +60 -0
  19. intel_npu_acceleration_library/backend/matmul.py +59 -0
  20. intel_npu_acceleration_library/backend/mlp.py +58 -0
  21. intel_npu_acceleration_library/backend/ops.py +142 -0
  22. intel_npu_acceleration_library/backend/qlinear.py +75 -0
  23. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  24. intel_npu_acceleration_library/backend/runtime.py +215 -0
  25. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  26. intel_npu_acceleration_library/backend/tensor.py +1120 -0
  27. intel_npu_acceleration_library/backend/utils.py +70 -0
  28. intel_npu_acceleration_library/compiler.py +194 -0
  29. intel_npu_acceleration_library/device.py +230 -0
  30. intel_npu_acceleration_library/dtypes.py +155 -0
  31. intel_npu_acceleration_library/external/openvino/__init__.py +72 -0
  32. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +21 -0
  33. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  36. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +370 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +180 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +118 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +131 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +290 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +126 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +568 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +258 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  82. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  83. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  84. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +481 -0
  85. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  86. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  87. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  88. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +28 -0
  89. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  90. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  91. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +5 -0
  92. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  93. intel_npu_acceleration_library/external/openvino/properties/__init__.py +22 -0
  94. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  95. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  96. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  97. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  98. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  99. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  100. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  101. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  102. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +19 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3068 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +398 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +17 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +276 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +215 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +787 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +40 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  144. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  145. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  146. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  147. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +447 -0
  148. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  149. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +156 -0
  150. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  151. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  152. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  153. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  154. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  155. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  156. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  157. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  158. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  159. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  160. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  161. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  162. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  163. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  164. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +550 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +40 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  186. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +298 -0
  187. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  188. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +214 -0
  189. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  190. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  191. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  192. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  193. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  194. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  195. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +196 -0
  196. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  197. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  198. intel_npu_acceleration_library/external/openvino/utils.py +115 -0
  199. intel_npu_acceleration_library/functional/__init__.py +8 -0
  200. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  201. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  202. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  214. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  215. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  216. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  217. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  218. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  219. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  220. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  221. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  222. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  223. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  224. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  225. intel_npu_acceleration_library/modelling.py +150 -0
  226. intel_npu_acceleration_library/nn/__init__.py +20 -0
  227. intel_npu_acceleration_library/nn/autograd.py +68 -0
  228. intel_npu_acceleration_library/nn/conv.py +257 -0
  229. intel_npu_acceleration_library/nn/functional.py +1207 -0
  230. intel_npu_acceleration_library/nn/linear.py +162 -0
  231. intel_npu_acceleration_library/nn/llm.py +417 -0
  232. intel_npu_acceleration_library/nn/module.py +393 -0
  233. intel_npu_acceleration_library/optimizations.py +157 -0
  234. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,250 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+ from typing import Optional, List, Union, Any, Dict, Tuple, Iterable
6
+ from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
7
+ import numpy as np
8
+ import intel_npu_acceleration_library
9
+ import ctypes
10
+ import os
11
+
12
+
13
+ def adapt_weight(w: np.ndarray) -> np.ndarray:
14
+ """Adapt the weights to run on the NPU.
15
+
16
+ Args:
17
+ w (np.ndarray): weights array
18
+
19
+ Returns:
20
+ np.ndarray: The adapted array
21
+ """
22
+ if len(w.shape) == 1:
23
+ w_adapted = w.reshape((1, -1))
24
+ return w_adapted, w_adapted.shape
25
+ elif len(w.shape) == 2:
26
+ return w, w.shape
27
+ else:
28
+ w_adapted = w.reshape((1, -1))
29
+ return w_adapted, w_adapted.shape
30
+
31
+
32
+ class BaseNPUBackend:
33
+ """A base class that represent a abstract Matrix-Matrix operation on the NPU."""
34
+
35
+ def __init__(self, profile: Optional[bool] = False) -> None:
36
+ """Initialize class profiling.
37
+
38
+ Args:
39
+ profile (Optional[bool], optional): Enable/Disable NPU profiling. Defaults to False.
40
+ """
41
+ if profile:
42
+ os.environ["NPU_PRINT_PROFILING"] = "JSON"
43
+ os.environ["NPU_PROFILING_OUTPUT_FILE"] = "profiling.json"
44
+ os.environ["NPU_PROFILING_VERBOSITY"] = "HIGH"
45
+ self._mm: Any = None
46
+
47
+ def __del__(self):
48
+ """Deallocate and free the class from the library."""
49
+ if (
50
+ hasattr(self, "_mm")
51
+ and intel_npu_acceleration_library
52
+ and hasattr(backend_lib, "destroyNNFactory")
53
+ ):
54
+ backend_lib.destroyNNFactory(self._mm)
55
+
56
+ def save(self, path: str, compress_to_fp16: bool = True):
57
+ """Save the Openvino model.
58
+
59
+ Args:
60
+ path (str): the model save path
61
+ compress_to_fp16 (bool): whether to compress floating point weights to FP16 (default: True).
62
+ """
63
+ backend_lib.saveModel(self._mm, ctypes.c_char_p(path.encode()), compress_to_fp16)
64
+
65
+ def saveCompiledModel(self, path: str):
66
+ """Save the compiled model.
67
+
68
+ Args:
69
+ path (str): the compiled model save path
70
+ """
71
+ backend_lib.saveCompiledModel(self._mm, ctypes.c_char_p(path.encode()))
72
+
73
+ def serialize(self, xml_path: str, bin_path: str):
74
+ """Serialize the Openvino model.
75
+
76
+ Args:
77
+ xml_path (str): the model save xml path
78
+ bin_path (str): the model save bin path
79
+ """
80
+ backend_lib.serializeModel(self._mm, ctypes.c_char_p(xml_path.encode()), ctypes.c_char_p(bin_path.encode()))
81
+
82
+
83
+ class BaseNPUBackendWithPrefetch(BaseNPUBackend):
84
+ """A base class that represent a abstract Matrix-Matrix operation on the NPU.
85
+
86
+ Linear type classes employ an algorithm to optimize weights prefetching
87
+ """
88
+
89
+ def __init__(self, profile: bool):
90
+ """Initialize class.
91
+
92
+ Args:
93
+ profile (bool): Enable/Disable NPU profiling.
94
+ """
95
+ super().__init__(profile)
96
+ self.wt_order: List[str] = []
97
+ self.wt_map: Dict[str, ctypes._Pointer] = {}
98
+ self.loaded: Optional[str] = None
99
+
100
+ def load_wt_fn(self, offset, module, parameters, verify_size=False):
101
+ """Load asyncronously the parameter into the NPU.
102
+
103
+ Args:
104
+ module: the NPU backend module
105
+ parameters: the weights parameter class
106
+ """
107
+ backend_lib.setNNFactoryWeights(module, offset, parameters, verify_size)
108
+
109
+ def create_parameters(
110
+ self, weights: Iterable[Union[np.ndarray, Tuple[np.ndarray, ...]]]
111
+ ) -> ctypes._Pointer:
112
+ """Create an operation parameter from a list of weights.
113
+
114
+ Args:
115
+ weights (Iterable[Union[np.ndarray, Tuple[np.ndarray, ...]]]): Operation parameters
116
+
117
+ Raises:
118
+ RuntimeError: Quantized weights needs to be in int8 format
119
+ ValueError: Invalid dtype for scale
120
+
121
+ Returns:
122
+ ctypes._Pointer: an instance to the Parameters object
123
+ """
124
+ param = backend_lib.createParameters()
125
+ if isinstance(weights, (list, tuple)):
126
+ for weight in weights:
127
+ if isinstance(weight, (list, tuple)):
128
+ # int8: data and scale
129
+ if len(weight) == 2:
130
+ data, scale = weight
131
+ zero = None
132
+ elif len(weight) == 3:
133
+ # for asym int4
134
+ data, scale, zero = weight
135
+ if data.dtype not in [np.int8, np.uint8]:
136
+ raise RuntimeError(
137
+ "Quantized weights needs to be in int8 or uint8 format"
138
+ )
139
+ adapted_weights, shape = adapt_weight(data)
140
+ adapted_weights_scale, shape_scale = adapt_weight(scale)
141
+ if scale.dtype == np.float16:
142
+ # Mixed precision matmul
143
+ if data.dtype == np.int8:
144
+ backend_lib.addIntParameter(
145
+ param,
146
+ adapted_weights,
147
+ adapted_weights_scale,
148
+ *shape,
149
+ *shape_scale,
150
+ )
151
+ elif data.dtype == np.uint8 and zero is not None:
152
+ # asym_int4
153
+ adapted_weights_zero, shape_zero = adapt_weight(zero)
154
+ backend_lib.addAsymInt4Parameter(
155
+ param,
156
+ adapted_weights,
157
+ adapted_weights_scale,
158
+ adapted_weights_zero,
159
+ *shape,
160
+ *shape_scale,
161
+ *shape_zero
162
+ )
163
+ else:
164
+ # sym_int4
165
+ backend_lib.addInt4Parameter(
166
+ param,
167
+ adapted_weights,
168
+ adapted_weights_scale,
169
+ *shape,
170
+ *shape_scale
171
+ )
172
+ elif scale.dtype == np.float32:
173
+ # FP16 matmul with CPU conversion
174
+ backend_lib.addIntParameterConversion(
175
+ param,
176
+ adapted_weights,
177
+ adapted_weights_scale,
178
+ *shape,
179
+ )
180
+ else:
181
+ raise ValueError(f"Invalid dtype for scale: {scale.dtype}")
182
+ else:
183
+ adapted_weights, shape = adapt_weight(weight)
184
+ if weight.dtype == np.uint8:
185
+ backend_lib.addInt4WeightParameter(
186
+ param,
187
+ adapted_weights,
188
+ *shape,
189
+ )
190
+ else:
191
+ backend_lib.addFloatParameter(param, adapted_weights, *shape)
192
+ elif isinstance(weights, np.ndarray):
193
+ adapted_weights, shape = adapt_weight(weights)
194
+ backend_lib.addFloatParameter(param, adapted_weights, *shape)
195
+ return param
196
+
197
+ def add_to_map(
198
+ self, wt_hash: str, weights: Iterable[Union[np.ndarray, Tuple[np.ndarray, ...]]]
199
+ ):
200
+ """Add an operation parameters to the operation hash:parameter map.
201
+
202
+ Args:
203
+ wt_hash (str): operation hash
204
+ weights (Iterable[Union[np.ndarray, Tuple[np.ndarray, ...]]]): Operation parameters
205
+ """
206
+ self.wt_map[wt_hash] = self.create_parameters(weights)
207
+
208
+ self.wt_order.append(wt_hash)
209
+
210
+ def setWeights(
211
+ self, offset: int, wt_hash: Optional[str], *args: Union[np.ndarray, Tuple[np.ndarray, ...]],
212
+ verify_size: bool = False
213
+ ) -> bool:
214
+ """Set the operation weights in the NPU.
215
+
216
+ Args:
217
+ wt_hash (str): operation hash. If set to None force the load of the weights
218
+ args (Union[np.ndarray, Tuple[np.ndarray, ...]]): Variable length weights list. Can be a np array or a tuple of weight, scale in case of quantized tensors
219
+
220
+ Returns:
221
+ bool: Return True if the op parameters are already in the op map
222
+ """
223
+ if wt_hash is None:
224
+ self.load_wt_fn(offset, self._mm, self.create_parameters(args), verify_size=verify_size)
225
+ return False
226
+ in_wt_map = wt_hash in self.wt_map.keys()
227
+ if not wt_hash == self.loaded:
228
+ if not in_wt_map:
229
+ self.add_to_map(wt_hash, args)
230
+ self.load_wt_fn(offset, self._mm, self.wt_map[wt_hash], verify_size=verify_size)
231
+ self.loaded = wt_hash
232
+ return in_wt_map
233
+ return in_wt_map
234
+
235
+ def prefetchWeights(self, offset, verify_size: bool = False):
236
+ """Prefetch next operation weights."""
237
+ next_wt_idx = (self.wt_order.index(self.loaded) + 1) % len(self.wt_order)
238
+ wt_hash = self.wt_order[next_wt_idx]
239
+ if not wt_hash == self.loaded:
240
+ self.load_wt_fn(offset, self._mm, self.wt_map[wt_hash], verify_size=verify_size)
241
+ self.loaded = wt_hash
242
+
243
+ def __del__(self):
244
+ """Deallocate and free the class from the library."""
245
+ super(BaseNPUBackendWithPrefetch, self).__del__()
246
+ for par in self.wt_map.values():
247
+ if intel_npu_acceleration_library and hasattr(
248
+ backend_lib, "destroyParameters"
249
+ ):
250
+ backend_lib.destroyParameters(par)
@@ -0,0 +1,383 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.ops import get_supported_ops
7
+ import numpy as np
8
+ import warnings
9
+ import ctypes
10
+ import sys
11
+ import os
12
+
13
+ handler = ctypes.POINTER(ctypes.c_char)
14
+ c_fp16_array = np.ctypeslib.ndpointer(dtype=np.float16, ndim=2, flags="C_CONTIGUOUS")
15
+ c_fp32_array = np.ctypeslib.ndpointer(dtype=np.float32, ndim=2, flags="C_CONTIGUOUS")
16
+ c_i8_array = np.ctypeslib.ndpointer(dtype=np.int8, ndim=2, flags="C_CONTIGUOUS")
17
+ c_u8_array = np.ctypeslib.ndpointer(dtype=np.uint8, ndim=2, flags="C_CONTIGUOUS")
18
+ c_u32_array = np.ctypeslib.ndpointer(dtype=np.uint32, ndim=1, flags="C_CONTIGUOUS")
19
+
20
+
21
+ def load_library() -> ctypes.CDLL:
22
+ """Load the Intel® NPU Acceleration Library runtime library.
23
+
24
+ Raises:
25
+ RuntimeError: an error is raised if the platform is not supported. Currently supported platforms are WIndows and Linux
26
+
27
+ Returns:
28
+ ctypes.CDLL: The loaded dynamic library
29
+ """
30
+ path = os.path.dirname(os.path.abspath(__file__))
31
+ if "openvino" in sys.modules:
32
+ warnings.warn(
33
+ "OpenVINO library is already loaded. It might interfere with NPU acceleration library if it uses an old version.",
34
+ stacklevel=2,
35
+ )
36
+
37
+ external_path = os.path.join(path, "..", "external")
38
+ sys.path.insert(0, external_path)
39
+
40
+ if sys.platform == "win32":
41
+ dll_path = os.path.join(path, "..", "lib", "Release")
42
+ os.environ["OPENVINO_LIB_PATHS"] = dll_path
43
+ os.add_dll_directory(os.path.abspath(dll_path))
44
+ # Load DLL into memory.
45
+ lib = ctypes.WinDLL(
46
+ os.path.join(dll_path, "intel_npu_acceleration_library.dll")
47
+ ) # , winmode=0)
48
+ elif sys.platform == "linux":
49
+ dll_path = os.path.join(path, "..", "lib")
50
+ sys.path.append(dll_path)
51
+ # In Linux it is required to explicitly load openvino lib
52
+ _ = ctypes.CDLL(os.path.join(dll_path, "libopenvino.so"))
53
+ lib = ctypes.CDLL(
54
+ os.path.join(dll_path, "libintel_npu_acceleration_library.so")
55
+ )
56
+ else:
57
+ raise RuntimeError(
58
+ f"Platform {sys.platform} is not supported for intel-npu-acceleration-library library"
59
+ )
60
+
61
+ return lib
62
+
63
+
64
+ def init_common(lib: ctypes.CDLL):
65
+ """Initialize common runtime bindings.
66
+
67
+ Args:
68
+ lib (ctypes.CDLL): Intel® NPU Acceleration Library runtime library
69
+ """
70
+ lib.saveModel.argtypes = [handler, ctypes.c_char_p, ctypes.c_bool]
71
+ lib.saveCompiledModel.argtypes = [handler, ctypes.c_char_p]
72
+ lib.serializeModel.argtypes = [handler, ctypes.c_char_p, ctypes.c_char_p]
73
+
74
+ # Set input activations
75
+ lib.set_activation.argtypes = [handler, ctypes.c_void_p, ctypes.c_int]
76
+
77
+ # Set outputs activations
78
+ lib.set_output.argtypes = [handler, ctypes.c_void_p, ctypes.c_int]
79
+
80
+ # Run a linar layer
81
+ lib.run.argtypes = [handler]
82
+ lib.run.restype = ctypes.c_float
83
+
84
+ lib.run_decoders.argtypes = [ctypes.POINTER(handler), ctypes.POINTER(ctypes.c_void_p), ctypes.c_int, ctypes.c_int]
85
+ lib.run_decoders.restype = ctypes.c_float
86
+
87
+ # Common destructor
88
+ lib.destroyNNFactory.argtypes = [handler]
89
+
90
+ lib.isNPUAvailable.restype = ctypes.c_bool
91
+ lib.getNPUDriverVersion.restype = ctypes.c_int32
92
+
93
+ lib.compressToI4.argtypes = [c_i8_array, c_u8_array, ctypes.c_int]
94
+
95
+ # Remote tensors
96
+ lib.to_npu.argtypes = [ctypes.c_int, c_u32_array, ctypes.c_char_p, ctypes.c_void_p]
97
+ lib.to_npu.restype = handler
98
+
99
+ lib.remote_tensor_data.argtypes = [handler]
100
+ lib.remote_tensor_data.restype = ctypes.c_void_p
101
+
102
+ lib.del_remote_tensor.argtypes = [handler]
103
+
104
+
105
+
106
+ def init_network_factory(lib: ctypes.CDLL):
107
+ """Initialize Netowrk factory bindings.
108
+
109
+ Args:
110
+ lib (ctypes.CDLL): Intel® NPU Acceleration Library runtime library
111
+ """
112
+ lib.createNNFactory.argtypes = [
113
+ ctypes.c_char_p,
114
+ ctypes.c_bool,
115
+ ]
116
+ lib.createNNFactory.restype = handler
117
+
118
+ lib.setNNFactoryWeights.argtypes = [handler, ctypes.c_int, handler, ctypes.c_bool]
119
+
120
+ lib.op_shape_size.argtypes = [handler, ctypes.c_int]
121
+ lib.op_shape_size.restype = ctypes.c_int
122
+
123
+ lib.op_shape.argtypes = [handler, ctypes.c_int, ctypes.c_int]
124
+ lib.op_shape.restype = ctypes.c_int
125
+
126
+ lib.op_dtype.argtypes = [handler, ctypes.c_int]
127
+ lib.op_dtype.restype = ctypes.c_int
128
+
129
+ lib.op_output_size.argtypes = [handler]
130
+ lib.op_output_size.restype = ctypes.c_int
131
+
132
+ lib.parameter.argtypes = [handler, ctypes.c_int, c_u32_array, ctypes.c_char_p]
133
+ lib.parameter.restype = handler
134
+
135
+ lib.to.argtypes = [handler, handler, ctypes.c_char_p]
136
+ lib.to.restype = handler
137
+
138
+ lib.constant.argtypes = [
139
+ handler,
140
+ ctypes.c_int,
141
+ c_u32_array,
142
+ ctypes.c_char_p,
143
+ ctypes.c_void_p,
144
+ ]
145
+ lib.constant.restype = handler
146
+
147
+ lib.slice.argtypes = [
148
+ handler,
149
+ handler,
150
+ handler,
151
+ handler,
152
+ handler,
153
+ ctypes.c_int,
154
+ c_u32_array,
155
+ ctypes.c_int,
156
+ c_u32_array,
157
+ ]
158
+ lib.slice.restype = handler
159
+
160
+ lib.simple_slice.argtypes = [
161
+ handler,
162
+ handler,
163
+ handler,
164
+ handler,
165
+ handler
166
+ ]
167
+ lib.simple_slice.restype = handler
168
+
169
+ lib.compile.argtypes = [handler, ctypes.c_int]
170
+ lib.compile.restype = handler
171
+
172
+ lib.get_output_tensor_shape_size.argtypes = [handler, ctypes.c_int]
173
+ lib.get_output_tensor_shape_size.restype = ctypes.c_int
174
+
175
+ lib.get_output_tensor_shape.argtypes = [handler, ctypes.c_int, ctypes.c_int]
176
+ lib.get_output_tensor_shape.restype = ctypes.c_int
177
+
178
+ lib.linear.argtypes = [
179
+ handler,
180
+ handler,
181
+ ctypes.c_int,
182
+ ctypes.c_int,
183
+ ctypes.c_bool,
184
+ ctypes.c_char_p,
185
+ ctypes.c_char_p,
186
+ ctypes.c_bool,
187
+ ctypes.c_bool,
188
+ ]
189
+ lib.linear.restype = handler
190
+
191
+ lib.convolution.argtypes = [
192
+ handler,
193
+ handler,
194
+ handler,
195
+ handler,
196
+ ctypes.c_int,
197
+ c_u32_array,
198
+ ctypes.c_int,
199
+ c_u32_array,
200
+ ctypes.c_int,
201
+ c_u32_array,
202
+ ctypes.c_int,
203
+ c_u32_array,
204
+ ctypes.c_int,
205
+ ctypes.c_char_p,
206
+ ]
207
+ lib.convolution.restype = handler
208
+
209
+ lib.avg_pooling.argtypes = [
210
+ handler,
211
+ handler,
212
+ ctypes.c_int,
213
+ c_u32_array,
214
+ ctypes.c_int,
215
+ c_u32_array,
216
+ ctypes.c_int,
217
+ c_u32_array,
218
+ ctypes.c_int,
219
+ c_u32_array,
220
+ ctypes.c_bool,
221
+ ctypes.c_int,
222
+ ctypes.c_int,
223
+ ]
224
+ lib.avg_pooling.restype = handler
225
+
226
+ lib.max_pooling.argtypes = [
227
+ handler,
228
+ handler,
229
+ ctypes.c_int,
230
+ c_u32_array,
231
+ ctypes.c_int,
232
+ c_u32_array,
233
+ ctypes.c_int,
234
+ c_u32_array,
235
+ ctypes.c_int,
236
+ c_u32_array,
237
+ ctypes.c_int,
238
+ ctypes.c_int,
239
+ ]
240
+ lib.max_pooling.restype = handler
241
+
242
+
243
+ lib.multi_concat.argtypes = [
244
+ handler,
245
+ ctypes.POINTER(handler),
246
+ ctypes.c_uint64,
247
+ ctypes.c_int64,
248
+ ]
249
+ lib.multi_concat.restype = handler
250
+
251
+ lib.variadic_split.argtypes = [
252
+ handler,
253
+ handler,
254
+ ctypes.c_int,
255
+ c_u32_array,
256
+ ctypes.c_int,
257
+ ]
258
+ lib.variadic_split.restype = handler
259
+
260
+ lib.dq_split_linear.argtypes = [
261
+ handler,
262
+ handler,
263
+ ctypes.c_int,
264
+ ctypes.c_int,
265
+ ctypes.c_int,
266
+ ctypes.c_bool,
267
+ ctypes.c_char_p,
268
+ ctypes.c_char_p,
269
+ ctypes.c_bool,
270
+ ctypes.c_bool,
271
+ ]
272
+ lib.dq_split_linear.restype = handler
273
+
274
+ lib.dq_split_linear_prefill.argtypes = [
275
+ handler,
276
+ handler,
277
+ ctypes.c_int,
278
+ ctypes.c_int,
279
+ ctypes.c_int,
280
+ ctypes.c_bool,
281
+ ctypes.c_char_p,
282
+ ctypes.c_char_p,
283
+ ctypes.c_bool,
284
+ ctypes.c_bool,
285
+ ]
286
+ lib.dq_split_linear_prefill.restype = handler
287
+
288
+ lib.gw_linear_prefill.argtypes = [
289
+ handler,
290
+ handler,
291
+ ctypes.c_int,
292
+ ctypes.c_int,
293
+ ctypes.c_int,
294
+ ctypes.c_bool,
295
+ ctypes.c_char_p,
296
+ ctypes.c_char_p,
297
+ ctypes.c_bool,
298
+ ]
299
+ lib.gw_linear_prefill.restype = handler
300
+
301
+ for op in get_supported_ops():
302
+ fn = getattr(lib, op.name)
303
+ fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
304
+ fn.restype = handler
305
+
306
+
307
+ def init_parameters(lib: ctypes.CDLL):
308
+ """Initialize Netowrk factory parameters.
309
+
310
+ Args:
311
+ lib (ctypes.CDLL): Intel® NPU Acceleration Library runtime library
312
+ """
313
+ lib.createParameters.argtypes = []
314
+ lib.createParameters.restype = handler
315
+
316
+ lib.destroyParameters.argtypes = [handler]
317
+
318
+ lib.addFloatParameter.argtypes = [handler, c_fp16_array, ctypes.c_int, ctypes.c_int]
319
+ lib.addIntParameter.argtypes = [
320
+ handler,
321
+ c_i8_array,
322
+ c_fp16_array,
323
+ ctypes.c_int,
324
+ ctypes.c_int,
325
+ ctypes.c_int,
326
+ ctypes.c_int,
327
+ ]
328
+
329
+ lib.addInt4Parameter.argtypes = [
330
+ handler,
331
+ c_u8_array,
332
+ c_fp16_array,
333
+ ctypes.c_int,
334
+ ctypes.c_int,
335
+ ctypes.c_int,
336
+ ctypes.c_int,
337
+ ]
338
+
339
+ lib.addAsymInt4Parameter.argtypes = [
340
+ handler,
341
+ c_u8_array,
342
+ c_fp16_array,
343
+ c_fp16_array,
344
+ ctypes.c_int,
345
+ ctypes.c_int,
346
+ ctypes.c_int,
347
+ ctypes.c_int,
348
+ ctypes.c_int,
349
+ ctypes.c_int,
350
+ ]
351
+
352
+ lib.addIntParameterConversion.argtypes = [
353
+ handler,
354
+ c_i8_array,
355
+ c_fp32_array,
356
+ ctypes.c_int,
357
+ ctypes.c_int,
358
+ ]
359
+
360
+ lib.addInt4WeightParameter.argtypes = [
361
+ handler,
362
+ c_u8_array,
363
+ ctypes.c_int,
364
+ ctypes.c_int,
365
+ ]
366
+
367
+
368
+ def initialize_bindings() -> ctypes.CDLL:
369
+ """Load the Intel® NPU Acceleration Library runtime library, and initialize all c++ <-> python bindings.
370
+
371
+ Returns:
372
+ ctypes.CDLL: Initialize matmul bindings
373
+ """
374
+ lib = load_library()
375
+
376
+ init_common(lib)
377
+ init_network_factory(lib)
378
+ init_parameters(lib)
379
+
380
+ return lib
381
+
382
+
383
+ lib = initialize_bindings()
@@ -0,0 +1,24 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
7
+ import numpy as np
8
+
9
+
10
+ def compress_to_i4(weights: np.ndarray) -> np.ndarray:
11
+ """Compress a int8 array to int4.
12
+
13
+ Args:
14
+ weights (np.ndarray): input array
15
+
16
+ Returns:
17
+ np.ndarray: compressed array
18
+ """
19
+ compressed_weights = np.zeros(
20
+ (weights.shape[0], weights.shape[1] // 2), dtype=np.uint8
21
+ )
22
+
23
+ backend_lib.compressToI4(weights, compressed_weights, np.prod(weights.shape))
24
+ return compressed_weights
@@ -0,0 +1,58 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ from typing import Sequence, Union
8
+ import numpy as np
9
+
10
+
11
+ class Convolution(NNFactory):
12
+ """Linear class, computing a matrix matrix multiplication with weights prefetching."""
13
+
14
+ def __init__(
15
+ self,
16
+ input_shape: Sequence[int],
17
+ weights_shape: Sequence[int],
18
+ bias: bool = False,
19
+ strides: Union[int, Sequence[int]] = 1,
20
+ padding: Union[int, Sequence[int]] = 0,
21
+ dilation: Union[int, Sequence[int]] = 1,
22
+ groups: int = 1,
23
+ profile: bool = False,
24
+ device: str = "NPU",
25
+ ):
26
+ """Initialize the Linear class.
27
+
28
+ Args:
29
+ input_shape (Sequence[int]): input shape
30
+ weights_shape (Sequence[int]): weights shape
31
+ bias (bool): Enable/Disable bias. Defaults to False.
32
+ strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
33
+ padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
34
+ dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
35
+ groups (int, optional): Groups. Defaults to 1.
36
+ profile (Optional[bool], optional): Enable/Disable profiling. Defaults to False.
37
+ device (str): Target device, default to "NPU".
38
+ """
39
+ super().__init__(profile, device)
40
+ input = self.parameter(input_shape)
41
+ weights = self.parameter(weights_shape)
42
+ if bias is not None:
43
+ bias_node = self.parameter((1, weights_shape[0], 1, 1))
44
+ else:
45
+ bias_node = None
46
+
47
+ _ = self.convolution(
48
+ input,
49
+ weights,
50
+ bias=bias_node,
51
+ strides=strides,
52
+ padding=padding,
53
+ dilation=dilation,
54
+ groups=groups,
55
+ act_dtype=np.float16,
56
+ )
57
+
58
+ self.compile()