bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/include/common.h +96 -0
  3. bigdl-core-npu/include/npu_llm.h +74 -0
  4. bigdl-core-npu/npu_llm.dll +0 -0
  5. bigdl-core-npu/npu_llm.lib +0 -0
  6. bigdl_core_npu-2.6.0b20250114.dist-info/METADATA +44 -0
  7. bigdl_core_npu-2.6.0b20250114.dist-info/RECORD +234 -0
  8. bigdl_core_npu-2.6.0b20250114.dist-info/WHEEL +5 -0
  9. bigdl_core_npu-2.6.0b20250114.dist-info/top_level.txt +2 -0
  10. intel_npu_acceleration_library/__init__.py +24 -0
  11. intel_npu_acceleration_library/_version.py +6 -0
  12. intel_npu_acceleration_library/backend/__init__.py +37 -0
  13. intel_npu_acceleration_library/backend/base.py +250 -0
  14. intel_npu_acceleration_library/backend/bindings.py +383 -0
  15. intel_npu_acceleration_library/backend/compression.py +24 -0
  16. intel_npu_acceleration_library/backend/convolution.py +58 -0
  17. intel_npu_acceleration_library/backend/factory.py +1161 -0
  18. intel_npu_acceleration_library/backend/linear.py +60 -0
  19. intel_npu_acceleration_library/backend/matmul.py +59 -0
  20. intel_npu_acceleration_library/backend/mlp.py +58 -0
  21. intel_npu_acceleration_library/backend/ops.py +142 -0
  22. intel_npu_acceleration_library/backend/qlinear.py +75 -0
  23. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  24. intel_npu_acceleration_library/backend/runtime.py +215 -0
  25. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  26. intel_npu_acceleration_library/backend/tensor.py +1120 -0
  27. intel_npu_acceleration_library/backend/utils.py +70 -0
  28. intel_npu_acceleration_library/compiler.py +194 -0
  29. intel_npu_acceleration_library/device.py +230 -0
  30. intel_npu_acceleration_library/dtypes.py +155 -0
  31. intel_npu_acceleration_library/external/openvino/__init__.py +72 -0
  32. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +21 -0
  33. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  36. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +370 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +180 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +118 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +131 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +290 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +126 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +568 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +258 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  82. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  83. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  84. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +481 -0
  85. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  86. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  87. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  88. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +28 -0
  89. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  90. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  91. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +5 -0
  92. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  93. intel_npu_acceleration_library/external/openvino/properties/__init__.py +22 -0
  94. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  95. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  96. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  97. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  98. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  99. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  100. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  101. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  102. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +19 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3068 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +398 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +17 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +276 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +215 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +787 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +40 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  144. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  145. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  146. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  147. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +447 -0
  148. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  149. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +156 -0
  150. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  151. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  152. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  153. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  154. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  155. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  156. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  157. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  158. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  159. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  160. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  161. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  162. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  163. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  164. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +550 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +40 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  186. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +298 -0
  187. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  188. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +214 -0
  189. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  190. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  191. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  192. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  193. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  194. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  195. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +196 -0
  196. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  197. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  198. intel_npu_acceleration_library/external/openvino/utils.py +115 -0
  199. intel_npu_acceleration_library/functional/__init__.py +8 -0
  200. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  201. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  202. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  214. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  215. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  216. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  217. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  218. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  219. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  220. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  221. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  222. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  223. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  224. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  225. intel_npu_acceleration_library/modelling.py +150 -0
  226. intel_npu_acceleration_library/nn/__init__.py +20 -0
  227. intel_npu_acceleration_library/nn/autograd.py +68 -0
  228. intel_npu_acceleration_library/nn/conv.py +257 -0
  229. intel_npu_acceleration_library/nn/functional.py +1207 -0
  230. intel_npu_acceleration_library/nn/linear.py +162 -0
  231. intel_npu_acceleration_library/nn/llm.py +417 -0
  232. intel_npu_acceleration_library/nn/module.py +393 -0
  233. intel_npu_acceleration_library/optimizations.py +157 -0
  234. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,60 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class Linear(NNFactory):
11
+ """Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ ):
21
+ """Initialize the Linear class.
22
+
23
+ Args:
24
+ inC (int): input channels
25
+ outC (int): output channels
26
+ batch (int): batch
27
+ profile (bool): Enable/Disable profiling. Defaults to False.
28
+ device (str): Target device, default to "NPU".
29
+ """
30
+ super().__init__(profile, device)
31
+ self.inC, self.outC = inC, outC
32
+ self.batch = batch
33
+ input = self.parameter((self.batch, self.inC))
34
+ _ = self.linear(input, outC, inC, bias=False)
35
+ self.compile()
36
+
37
+ def run(self, X: np.ndarray, W: np.ndarray, op_id: str) -> np.ndarray:
38
+ """Run the layer: X * W^T.
39
+
40
+ Args:
41
+ X (np.ndarray): lhs operator
42
+ W (np.ndarray): rhs operator
43
+ op_id (str): operation id
44
+
45
+ Raises:
46
+ RuntimeError: Input or weight tensor shape mismatch
47
+
48
+ Returns:
49
+ np.ndarray: result
50
+ """
51
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
52
+ raise RuntimeError(
53
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
54
+ )
55
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
56
+ raise RuntimeError(
57
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
58
+ )
59
+
60
+ return super().run(X, W, op_id=op_id)
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class MatMul(NNFactory):
11
+ """MatMul class, computing a matrix matrix multiplication."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ ):
21
+ """Initialize the MatMul class.
22
+
23
+ Args:
24
+ inC (int): input channels
25
+ outC (int): output channels
26
+ batch (int): batch
27
+ profile (bool): Enable/Disable profiling. Defaults to False.
28
+ device (str): Target device, default to "NPU".
29
+ """
30
+ super().__init__(profile, device)
31
+ self.inC, self.outC = inC, outC
32
+ self.batch = batch
33
+ input = self.parameter((self.batch, self.inC))
34
+ _ = self.linear(input, outC, inC, bias=False)
35
+ self.compile()
36
+
37
+ def run(self, X: np.ndarray, W: np.ndarray) -> np.ndarray:
38
+ """Run the layer: X * W^T.
39
+
40
+ Args:
41
+ X (np.ndarray): lhs operator
42
+ W (np.ndarray): rhs operator
43
+
44
+ Raises:
45
+ RuntimeError: Input or weight tensor shape mismatch
46
+
47
+ Returns:
48
+ np.ndarray: result
49
+ """
50
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
51
+ raise RuntimeError(
52
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
53
+ )
54
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
55
+ raise RuntimeError(
56
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
57
+ )
58
+
59
+ return super().run(X, W)
@@ -0,0 +1,58 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ from typing import Optional, Sequence
8
+
9
+
10
+ class MLP(NNFactory):
11
+ """Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ input_shape: Sequence[int],
16
+ intermediate_size: int,
17
+ activation: str = "swiglu",
18
+ bias: Optional[bool] = False,
19
+ profile: bool = False,
20
+ device: str = "NPU",
21
+ **additional_args
22
+ ):
23
+ """Initialize the Linear class.
24
+
25
+ Args:
26
+ input_shape (Sequence[int]): input shape channels
27
+ intermediate_size (int): intermediate_size
28
+ activation (str): activation function to use
29
+ bias (Optional[bool], optional): Enable/Disable bias. Defaults to False.
30
+ profile (bool): Enable/Disable profiling. Defaults to False.
31
+ device (str): Target device, default to "NPU".
32
+ additional_args: additional arguments
33
+ """
34
+ super().__init__(profile, device)
35
+ self.intermediate_size = intermediate_size
36
+ self.batch, self.hidden_size = input_shape
37
+ input = self.parameter((self.batch, self.hidden_size))
38
+
39
+ mm1 = self.linear(input, self.intermediate_size, self.hidden_size, bias=bias)
40
+
41
+ if activation == "swiglu":
42
+ mm2 = self.linear(input, self.intermediate_size, self.hidden_size, bias=bias) # type: ignore[attr-defined]
43
+ mm1 = self.eltwise_mul(self.swish(mm1), mm2) # type: ignore[attr-defined]
44
+ elif activation == "clamp":
45
+ atc_fn = getattr(self, activation)
46
+ mm1 = atc_fn(mm1, additional_args.get("min"), additional_args.get("max"))
47
+ elif activation == "elu":
48
+ atc_fn = getattr(self, activation)
49
+ mm1 = atc_fn(mm1, additional_args.get("alpha", 1.0))
50
+ elif activation == "grn":
51
+ atc_fn = getattr(self, activation)
52
+ mm1 = atc_fn(mm1, additional_args.get("grn_bias"))
53
+ else:
54
+ atc_fn = getattr(self, activation)
55
+ mm1 = atc_fn(mm1)
56
+
57
+ _ = self.linear(mm1, self.hidden_size, self.intermediate_size, bias=bias)
58
+ self.compile()
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from dataclasses import dataclass
7
+ from functools import lru_cache
8
+ from typing import List, Any, Sequence
9
+ import ctypes
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class SupportedOp:
14
+ """A class for supported runtime OPs in the NPU.
15
+
16
+ Attrs:
17
+ name (str): Operation name
18
+ inputs (int): Number of inputs
19
+ parameters (Sequence[Any]): Optional parameters type.
20
+ """
21
+
22
+ name: str
23
+ inputs: int
24
+ parameters: Sequence[Any] = ()
25
+
26
+
27
+ @lru_cache(maxsize=None)
28
+ def get_supported_ops() -> List[SupportedOp]:
29
+ """Generate a list fo supported operations.
30
+
31
+ Returns:
32
+ List[SupportedOp]: list fo supported NPU operations
33
+ """
34
+ supported_ops = [
35
+ SupportedOp(name="result", inputs=1),
36
+ SupportedOp(name="matmul", inputs=2, parameters=[ctypes.c_bool, ctypes.c_bool]),
37
+ SupportedOp(name="eltwise_add", inputs=2),
38
+ SupportedOp(name="eltwise_mul", inputs=2),
39
+ SupportedOp(name="eltwise_div", inputs=2),
40
+ SupportedOp(name="abs_act", inputs=1),
41
+ SupportedOp(name="acos_act", inputs=1),
42
+ SupportedOp(name="asin_act", inputs=1),
43
+ SupportedOp(name="atan_act", inputs=1),
44
+ SupportedOp(name="ceiling", inputs=1),
45
+ SupportedOp(
46
+ name="clamp", inputs=1, parameters=[ctypes.c_float, ctypes.c_float]
47
+ ),
48
+ SupportedOp(name="cos_act", inputs=1),
49
+ SupportedOp(name="cosh_act", inputs=1),
50
+ SupportedOp(name="erf_act", inputs=1),
51
+ SupportedOp(name="elu", inputs=1, parameters=[ctypes.c_float]),
52
+ SupportedOp(name="exp_act", inputs=1),
53
+ SupportedOp(name="floor_act", inputs=1),
54
+ SupportedOp(name="grn", inputs=1, parameters=[ctypes.c_float]),
55
+ SupportedOp(name="gelu", inputs=1),
56
+ SupportedOp(name="gelu_erf", inputs=1),
57
+ SupportedOp(name="log_act", inputs=1),
58
+ SupportedOp(name="negative", inputs=1),
59
+ SupportedOp(name="relu", inputs=1),
60
+ SupportedOp(name="sigmoid", inputs=1),
61
+ SupportedOp(name="sign", inputs=1),
62
+ SupportedOp(name="sin_act", inputs=1),
63
+ SupportedOp(name="sinh_act", inputs=1),
64
+ SupportedOp(name="sqrt_act", inputs=1),
65
+ SupportedOp(name="tan_act", inputs=1),
66
+ SupportedOp(name="tanh_act", inputs=1),
67
+ SupportedOp(name="acosh_act", inputs=1),
68
+ SupportedOp(name="asinh_act", inputs=1),
69
+ SupportedOp(name="atanh_act", inputs=1),
70
+ SupportedOp(name="hswish", inputs=1),
71
+ SupportedOp(name="mish", inputs=1),
72
+ SupportedOp(name="softplus", inputs=1),
73
+ SupportedOp(name="hsigmoid", inputs=1),
74
+ SupportedOp(name="round_act", inputs=1),
75
+ SupportedOp(name="softsign", inputs=1),
76
+ SupportedOp(name="softmax", inputs=1, parameters=[ctypes.c_int]),
77
+ SupportedOp(name="swish", inputs=1),
78
+ SupportedOp(name="convert_to_fp16", inputs=1),
79
+ SupportedOp(name="convert_to_fp32", inputs=1),
80
+ SupportedOp(name="convert_to_int32", inputs=1),
81
+ SupportedOp(
82
+ name="scaled_dot_product_attention",
83
+ inputs=4,
84
+ parameters=[ctypes.c_bool],
85
+ ),
86
+ SupportedOp(
87
+ name="scaled_dot_product_attention_simple",
88
+ inputs=3,
89
+ parameters=[ctypes.c_bool],
90
+ ),
91
+ SupportedOp(
92
+ name="normL2",
93
+ inputs=2,
94
+ parameters=[ctypes.c_float],
95
+ ),
96
+ SupportedOp(
97
+ name="gather",
98
+ inputs=3,
99
+ parameters=[ctypes.c_int],
100
+ ),
101
+ SupportedOp(name="reshape", inputs=2, parameters=[ctypes.c_bool, ctypes.c_int]),
102
+ SupportedOp(name="transpose", inputs=2),
103
+ SupportedOp(name="squeeze", inputs=1),
104
+ SupportedOp(name="unsqueeze", inputs=2),
105
+ SupportedOp(
106
+ name="concat",
107
+ inputs=2,
108
+ parameters=[ctypes.c_int64],
109
+ ),
110
+ SupportedOp(
111
+ name="reduce_max",
112
+ inputs=2,
113
+ parameters=[ctypes.c_bool],
114
+ ),
115
+ SupportedOp(
116
+ name="reduce_mean",
117
+ inputs=2,
118
+ parameters=[ctypes.c_bool],
119
+ ),
120
+ SupportedOp(
121
+ name="reduce_min",
122
+ inputs=2,
123
+ parameters=[ctypes.c_bool],
124
+ ),
125
+ SupportedOp(
126
+ name="reduce_prod",
127
+ inputs=2,
128
+ parameters=[ctypes.c_bool],
129
+ ),
130
+ SupportedOp(
131
+ name="reduce_sum",
132
+ inputs=2,
133
+ parameters=[ctypes.c_bool],
134
+ ),
135
+ SupportedOp(name="adaptive_avg_pool", inputs=2),
136
+ SupportedOp(name="adaptive_max_pool", inputs=2),
137
+ SupportedOp(name="power", inputs=2),
138
+ SupportedOp(name="broadcast", inputs=2),
139
+ SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
140
+ SupportedOp(name="rotate_half", inputs=1),
141
+ ]
142
+ return supported_ops
@@ -0,0 +1,75 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class QLinear(NNFactory):
11
+ """Quantized Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ dtype: np.dtype = np.int8,
21
+ asym: bool = False
22
+ ):
23
+ """Initialize the QLinear class.
24
+
25
+ Args:
26
+ inC (int): input channels
27
+ outC (int): output channels
28
+ batch (int): batch
29
+ profile (bool): Enable/Disable profiling. Defaults to False.
30
+ device (str): Target device, default to "NPU".
31
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
32
+
33
+ """
34
+ super().__init__(profile, device)
35
+ self.inC, self.outC = inC, outC
36
+ self.batch = batch
37
+ self.asym = asym
38
+
39
+ input = self.parameter((self.batch, self.inC))
40
+ _ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype, asym=asym)
41
+ self.compile()
42
+
43
+ def run(
44
+ self, X: np.ndarray, W: np.ndarray, scale: np.ndarray, zero: np.ndarray=None, op_id: str=None
45
+ ) -> np.ndarray:
46
+ """Run the layer: $X * (W * S)^T$ .
47
+
48
+ Args:
49
+ X (np.ndarray): activation
50
+ W (np.ndarray): quantized weights
51
+ scale (np.ndarray): quantization scale
52
+ op_id (str): operation id
53
+
54
+ Raises:
55
+ RuntimeError: Input, weights or scale shape mismatch
56
+
57
+ Returns:
58
+ np.ndarray: result
59
+ """
60
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
61
+ raise RuntimeError(
62
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
63
+ )
64
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
65
+ raise RuntimeError(
66
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
67
+ )
68
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
69
+ raise RuntimeError(
70
+ f"Scale shape {W.shape} different from expected one {(self.outC, 1)}"
71
+ )
72
+ if not self.asym:
73
+ return super().run(X, (W, scale), op_id=op_id)
74
+ else:
75
+ return super().run(X, (W, scale, zero), op_id=op_id)
@@ -0,0 +1,66 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class QMatMul(NNFactory):
11
+ """Quantized Linear class, computing a matrix matrix multiplication."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ dtype: np.dtype = np.int8,
21
+ ):
22
+ """Initialize the QMatmul class.
23
+
24
+ Args:
25
+ inC (int): input channels
26
+ outC (int): output channels
27
+ batch (int): batch
28
+ profile (bool): Enable/Disable profiling. Defaults to False.
29
+ device (str): Target device, default to "NPU".
30
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
31
+ """
32
+ super().__init__(profile, device)
33
+ self.inC, self.outC = inC, outC
34
+ self.batch = batch
35
+ input = self.parameter((self.batch, self.inC))
36
+ _ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype)
37
+ self.compile()
38
+
39
+ def run(self, X: np.ndarray, W: np.ndarray, scale: np.ndarray) -> np.ndarray:
40
+ """Run the layer: X * (W * S)^T.
41
+
42
+ Args:
43
+ X (np.ndarray): activation
44
+ W (np.ndarray): quantized weights
45
+ scale (np.ndarray): quantization scale
46
+
47
+ Raises:
48
+ RuntimeError: Input, weights or scale shape mismatch
49
+
50
+ Returns:
51
+ np.ndarray: result
52
+ """
53
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
54
+ raise RuntimeError(
55
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
56
+ )
57
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
58
+ raise RuntimeError(
59
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
60
+ )
61
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
62
+ raise RuntimeError(
63
+ f"Scale shape {W.shape} different from expected one {(self.outC, 1)}"
64
+ )
65
+
66
+ return super().run(X, (W, scale))
@@ -0,0 +1,215 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend import Linear, QLinear
7
+ from intel_npu_acceleration_library.backend import MatMul, QMatMul
8
+ from intel_npu_acceleration_library.backend import NNFactory
9
+ from torch.profiler import record_function
10
+ from typing import Optional, Any, List, Dict, Deque, Union
11
+ from functools import partial
12
+ from collections import deque
13
+ import numpy as np
14
+ import torch
15
+
16
+ _model_cache: Dict[str, Deque[NNFactory]] = {}
17
+
18
+
19
+ def clear_cache():
20
+ """Clear the cache of models."""
21
+ global _model_cache
22
+ _model_cache = {}
23
+
24
+
25
+ @torch.no_grad()
26
+ def run_matmul(
27
+ x: torch.Tensor,
28
+ weights: torch.Tensor,
29
+ scale: Optional[torch.Tensor] = None,
30
+ zero: Optional[torch.Tensor] = None,
31
+ op_id: Optional[str] = None,
32
+ ) -> torch.Tensor:
33
+ """Run a matmul operation. Depending on the datatype of the weights it runs a float or quantized operation.
34
+
35
+ Args:
36
+ x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
37
+ weights (torch.Tensor): Weights tensor. Its dtype can be torch.float16 or torch.int8
38
+ scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
39
+ zero (Optional[torch.Tensor], optional): Quantization zero for asym_int4. If weights.dtype == torch.uint8 and use asym_int4 then it must be set and asym Defaults to None.
40
+ op_id (Optional[str], optional): Operation ID. Defaults to None.
41
+
42
+ Raises:
43
+ RuntimeError: Unsupported weights datatype. Supported types: [torch.float16, torch.int8]
44
+
45
+ Returns:
46
+ torch.Tensor: result
47
+ """
48
+ global _model_cache
49
+
50
+ outC, inC = weights.shape[-2:]
51
+
52
+ if weights.dtype == torch.uint8:
53
+ # In case is Int4 we need to double the input channels because weights are compressed
54
+ inC *= 2
55
+
56
+ # Set tensors as contiguous in memory
57
+ x = set_contiguous(x)
58
+ weights = set_contiguous(weights)
59
+ if len(weights.shape) > 2:
60
+ weights = weights.view([-1, weights.shape[-1]])
61
+
62
+ if weights.dtype.is_floating_point:
63
+ op_class = Linear if op_id is not None else MatMul
64
+ op_class_name = op_class.__name__
65
+ create_op = partial(op_class)
66
+ op_args = [weights.numpy()]
67
+ elif weights.dtype in (torch.int8, torch.uint8):
68
+ if scale is None:
69
+ raise RuntimeError("Quantized weights require a not null scale")
70
+ op_class = QLinear if op_id is not None else QMatMul
71
+ op_class_name = op_class.__name__
72
+ np_dtype = np.int8 if weights.dtype == torch.int8 else np.uint8
73
+ create_op = partial(op_class, dtype=np_dtype, asym=(zero is not None))
74
+ if scale is None:
75
+ raise RuntimeError(
76
+ f"Quantized matmul (weights dtype == {weights.dtype}) requires scale (scale = {scale})"
77
+ )
78
+ if zero is None:
79
+ op_args = [weights.numpy(), scale.numpy()]
80
+ else:
81
+ op_args = [weights.numpy(), scale.numpy(), zero.numpy()]
82
+ else:
83
+ raise RuntimeError(f"Unsupported dtype for weights {weights.dtype}")
84
+
85
+ if not x.dtype.is_floating_point:
86
+ raise RuntimeError(f"Unsupported dtype for activation {x.dtype}")
87
+
88
+ # Use or not op_id depending on the class used
89
+ op_kwargs = {"op_id": op_id} if op_id else {}
90
+
91
+ original_input_shape = x.shape
92
+ expected_output_shape = list(original_input_shape[:-1]) + [outC]
93
+
94
+ if not (len(x.shape) >= 2):
95
+ raise RuntimeError(f"Input shape {x.shape} must me >= 2")
96
+
97
+ # Reshape input
98
+ input_dtype = x.dtype
99
+ x = x.to(torch.float16) if input_dtype != torch.float16 else x
100
+ if len(x.shape) > 2 or x.shape[-1] != inC:
101
+ x = x.view([-1, inC])
102
+ x_np = x.numpy()
103
+
104
+ batch = x_np.shape[0]
105
+
106
+ key = f"{str(op_class_name)}_{batch}_{inC}_x_{outC}_{inC}_{x_np.dtype}"
107
+ models = _model_cache.get(key, None)
108
+
109
+ if models is None:
110
+ _model_cache[key] = deque([create_op(inC, outC, batch)])
111
+ elif len(models) < 1:
112
+ _model_cache[key].append(create_op(inC, outC, batch))
113
+ else:
114
+ _model_cache[key].rotate(1)
115
+
116
+ # Get the model
117
+ model = _model_cache[key][0]
118
+
119
+ profiling_name = "matvec" if batch == 1 else "matmul"
120
+ with record_function(f"npu_{profiling_name}_{key}"):
121
+ ret = model.run(x_np, *op_args, **op_kwargs)
122
+
123
+ return adapt_output_tensor(ret, expected_output_shape, input_dtype)
124
+
125
+
126
+ def adapt_output_tensor(
127
+ output: np.ndarray, original_shape: torch.Size, input_dtype: torch.dtype
128
+ ) -> torch.Tensor:
129
+ """Adapt the output tensor to the original shape and dtype.
130
+
131
+ Args:
132
+ output (np.ndarray): output tensor
133
+ original_shape (torch.Size): original shape
134
+ input_dtype (torch.dtype): input dtype
135
+
136
+ Returns:
137
+ torch.Tensor: output tensor
138
+ """
139
+ output = torch.from_numpy(output)
140
+ if output.shape != original_shape:
141
+ output = output.view(original_shape)
142
+ # needs to copy as the same buffer can be reutilized
143
+ return output.to(input_dtype, copy=True)
144
+
145
+
146
+ def set_contiguous(tensor: torch.Tensor) -> torch.Tensor:
147
+ """Set tensor to be contiguous in memory.
148
+
149
+ Args:
150
+ tensor (torch.Tensor): input tensor
151
+
152
+ Returns:
153
+ torch.Tensor: output, contiguous tensor
154
+ """
155
+ if not tensor.is_contiguous():
156
+ return tensor.contiguous()
157
+ return tensor
158
+
159
+
160
+ @torch.no_grad()
161
+ def run_factory(
162
+ x: Union[torch.Tensor, List[torch.Tensor]],
163
+ weights: List[torch.Tensor],
164
+ backend_cls: Any,
165
+ op_id: Optional[str] = None,
166
+ replica: int = 1,
167
+ ) -> torch.Tensor:
168
+ """Run a factory operation. Depending on the datatype of the weights it runs a float or quantized operation.
169
+
170
+ Args:
171
+ x (Union[torch.Tensor, List[torch.Tensor]]): Activation tensor(s). Its dtype must be torch.float16
172
+ weights (torch.Tensor): Weights tensor. Its dtype can be torch.float16 or torch.int8
173
+ backend_cls (Any): Backend class to run
174
+ op_id (Optional[str], optional): Operation ID. Defaults to None.
175
+
176
+ Returns:
177
+ torch.Tensor: result
178
+ """
179
+ global _model_cache
180
+
181
+ # Use or not op_id depending on the class used
182
+ op_kwargs = {"op_id": op_id} if op_id else {}
183
+
184
+ if not isinstance(x, (list, tuple)):
185
+ x = [x]
186
+
187
+ # Reshape input
188
+ input_dtype = x[0].dtype
189
+ x_np = [set_contiguous(elem).to(torch.float16).numpy() for elem in x]
190
+ op_args = [set_contiguous(w).to(torch.float16).numpy() for w in weights]
191
+
192
+ shape_dtype_signature = "_".join(
193
+ ["_".join(str(dim) for dim in t.shape) + f"_{t.dtype}" for t in x_np + op_args]
194
+ )
195
+ key = f"{backend_cls.func.__name__}_{shape_dtype_signature}"
196
+ models = _model_cache.get(key, None)
197
+
198
+ input_shapes = [elem.shape for elem in x_np]
199
+ if models is None:
200
+ _model_cache[key] = deque([backend_cls(*input_shapes) for i in range(replica)])
201
+ elif len(models) < 1:
202
+ _model_cache[key].append(backend_cls(*input_shapes))
203
+ else:
204
+ _model_cache[key].rotate(1)
205
+
206
+ # Get the model
207
+ model = _model_cache[key][0]
208
+
209
+ with record_function(f"npu_factory_mul_{key}"):
210
+ ret = model.run(*x_np, *op_args, **op_kwargs)
211
+
212
+ if isinstance(ret, list):
213
+ return [adapt_output_tensor(r, r.shape, input_dtype) for r in ret]
214
+
215
+ return adapt_output_tensor(ret, ret.shape, input_dtype)