bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/include/common.h +96 -0
  3. bigdl-core-npu/include/npu_llm.h +74 -0
  4. bigdl-core-npu/npu_llm.dll +0 -0
  5. bigdl-core-npu/npu_llm.lib +0 -0
  6. bigdl_core_npu-2.6.0b20250114.dist-info/METADATA +44 -0
  7. bigdl_core_npu-2.6.0b20250114.dist-info/RECORD +234 -0
  8. bigdl_core_npu-2.6.0b20250114.dist-info/WHEEL +5 -0
  9. bigdl_core_npu-2.6.0b20250114.dist-info/top_level.txt +2 -0
  10. intel_npu_acceleration_library/__init__.py +24 -0
  11. intel_npu_acceleration_library/_version.py +6 -0
  12. intel_npu_acceleration_library/backend/__init__.py +37 -0
  13. intel_npu_acceleration_library/backend/base.py +250 -0
  14. intel_npu_acceleration_library/backend/bindings.py +383 -0
  15. intel_npu_acceleration_library/backend/compression.py +24 -0
  16. intel_npu_acceleration_library/backend/convolution.py +58 -0
  17. intel_npu_acceleration_library/backend/factory.py +1161 -0
  18. intel_npu_acceleration_library/backend/linear.py +60 -0
  19. intel_npu_acceleration_library/backend/matmul.py +59 -0
  20. intel_npu_acceleration_library/backend/mlp.py +58 -0
  21. intel_npu_acceleration_library/backend/ops.py +142 -0
  22. intel_npu_acceleration_library/backend/qlinear.py +75 -0
  23. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  24. intel_npu_acceleration_library/backend/runtime.py +215 -0
  25. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  26. intel_npu_acceleration_library/backend/tensor.py +1120 -0
  27. intel_npu_acceleration_library/backend/utils.py +70 -0
  28. intel_npu_acceleration_library/compiler.py +194 -0
  29. intel_npu_acceleration_library/device.py +230 -0
  30. intel_npu_acceleration_library/dtypes.py +155 -0
  31. intel_npu_acceleration_library/external/openvino/__init__.py +72 -0
  32. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +21 -0
  33. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  36. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +370 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +180 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +118 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +131 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +290 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +126 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +568 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +258 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  82. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  83. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  84. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +481 -0
  85. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  86. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  87. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  88. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +28 -0
  89. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  90. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  91. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +5 -0
  92. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  93. intel_npu_acceleration_library/external/openvino/properties/__init__.py +22 -0
  94. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  95. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  96. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  97. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  98. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  99. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  100. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  101. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  102. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +19 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3068 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +398 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +17 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +276 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +215 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +787 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +40 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  144. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  145. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  146. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  147. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +447 -0
  148. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  149. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +156 -0
  150. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  151. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  152. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  153. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  154. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  155. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  156. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  157. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  158. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  159. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  160. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  161. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  162. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  163. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  164. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +550 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +40 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  186. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +298 -0
  187. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  188. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +214 -0
  189. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  190. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  191. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  192. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  193. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  194. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  195. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +196 -0
  196. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  197. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  198. intel_npu_acceleration_library/external/openvino/utils.py +115 -0
  199. intel_npu_acceleration_library/functional/__init__.py +8 -0
  200. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  201. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  202. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  214. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  215. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  216. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  217. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  218. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  219. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  220. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  221. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  222. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  223. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  224. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  225. intel_npu_acceleration_library/modelling.py +150 -0
  226. intel_npu_acceleration_library/nn/__init__.py +20 -0
  227. intel_npu_acceleration_library/nn/autograd.py +68 -0
  228. intel_npu_acceleration_library/nn/conv.py +257 -0
  229. intel_npu_acceleration_library/nn/functional.py +1207 -0
  230. intel_npu_acceleration_library/nn/linear.py +162 -0
  231. intel_npu_acceleration_library/nn/llm.py +417 -0
  232. intel_npu_acceleration_library/nn/module.py +393 -0
  233. intel_npu_acceleration_library/optimizations.py +157 -0
  234. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,70 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from functools import lru_cache
7
+ from .bindings import lib
8
+ import warnings
9
+ import sys
10
+
11
+ __min_npu_driver_version__ = 2408
12
+
13
+
14
+ @lru_cache
15
+ def npu_available() -> bool:
16
+ """Return if the NPU is available.
17
+
18
+ Returns:
19
+ bool: Return True if the NPU is available in the system
20
+ """
21
+ return lib.isNPUAvailable()
22
+
23
+
24
+ def get_driver_installation_url() -> str:
25
+ """Get the driver installation URL.
26
+
27
+ Returns:
28
+ std: Return the driver installation url
29
+ """
30
+ if sys.platform == "win32":
31
+ return "Driver Update URL: https://www.intel.com/content/www/us/en/download/794734/intel-npu-driver-windows.html"
32
+ elif sys.platform == "linux":
33
+ return "Driver Update URL: https://github.com/intel/linux-npu-driver"
34
+ else:
35
+ return ""
36
+
37
+
38
+ @lru_cache
39
+ def get_driver_version() -> int:
40
+ """Get the driver version for the Intel® NPU Acceleration Library.
41
+
42
+ Raises:
43
+ RuntimeError: an error is raised if the platform is not supported. Currently supported platforms are Windows and Linux
44
+
45
+ Returns:
46
+ int: NPU driver version
47
+ """
48
+ if not npu_available():
49
+ raise RuntimeError("NPU is not available on this system")
50
+
51
+ return lib.getNPUDriverVersion()
52
+
53
+
54
+ def check_npu_and_driver_version():
55
+ """Check NPU and driver version."""
56
+ if not npu_available():
57
+ warnings.warn(
58
+ "NPU is not available in your system. Library will fallback to AUTO device selection mode",
59
+ stacklevel=2,
60
+ )
61
+ elif get_driver_version() < __min_npu_driver_version__:
62
+
63
+ warnings.warn(
64
+ f"\nWarning: Outdated Driver Detected!!!\n"
65
+ f"Current Driver Version: {get_driver_version()}, Minimum Required Version: {__min_npu_driver_version__}\n"
66
+ f"Using an outdated driver may result in reduced performance and unexpected errors and crashes"
67
+ f"To avoid these issues, please update your driver to the latest version.\n"
68
+ f"{get_driver_installation_url()}\n",
69
+ stacklevel=2,
70
+ )
@@ -0,0 +1,194 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.optimizations import horizontal_fusion_linear
7
+ from transformers.models.llama.modeling_llama import LlamaMLP, LlamaAttention
8
+ from transformers.models.gemma.modeling_gemma import GemmaMLP, GemmaAttention
9
+ from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear
10
+ from intel_npu_acceleration_library.quantization import quantize_model
11
+ from intel_npu_acceleration_library.dtypes import int8, int4
12
+ import intel_npu_acceleration_library.nn as nn
13
+ from torch._dynamo import register_backend
14
+ from typing import Union, Callable, Any
15
+ from typing import List
16
+ import torch
17
+
18
+
19
+ def compile(
20
+ model: torch.nn.Module, dtype: torch.dtype = torch.float16, training: bool = False
21
+ ) -> torch.nn.Module:
22
+ """Compile a model for the NPU.
23
+
24
+ Args:
25
+ model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
26
+ dtype (torch.dtype): the model target datatype, default to torch.float16
27
+ training (bool): enable training. Default disabled
28
+
29
+ Raises:
30
+ RuntimeError: invalid datatypes
31
+
32
+ Returns:
33
+ torch.nn.Module: compiled NPU nn.Module
34
+ """
35
+ if not (dtype.is_floating_point or dtype in (int8, int4)):
36
+ raise RuntimeError(
37
+ f"intel-npu-acceleration-library library do not support yet the requeste datatype: {dtype}"
38
+ )
39
+
40
+ # Prepare and optimize model for NPU
41
+ with torch.no_grad():
42
+ # General optimizations
43
+ apply_general_optimizations(model)
44
+ if dtype in (int8, int4):
45
+ # Quantize model
46
+ model = quantize_model(model, dtype)
47
+
48
+ # Model lowering to NPU ops
49
+ create_npu_kernels(model)
50
+
51
+ if dtype.is_floating_point and training:
52
+ # Set model to evaluation only as quantized training is not supported yet
53
+ return model
54
+
55
+ return model.eval()
56
+
57
+
58
+ def apply_general_optimizations(model: torch.nn.Module):
59
+ """Apply general optimizations to a torch.nn.Module.
60
+
61
+ Args:
62
+ model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
63
+ """
64
+ apply_horizontal_fusion(model)
65
+ optimize_llama_attention(model)
66
+
67
+
68
+ def create_npu_kernels(model: torch.nn.Module):
69
+ """Create NPU kernels.
70
+
71
+ Args:
72
+ model (torch.nn.Module): a pytorch nn.Module to compile and optimize for the npu
73
+ """
74
+ lower_linear(model)
75
+
76
+
77
+ def module_optimization(func: Callable) -> torch.nn.Module:
78
+ """Optimize recursively a torch.nn.Module with a specific function.
79
+
80
+ The function `func` get called recursively to every module in the network.
81
+
82
+ Args:
83
+ func (Callable): optimization function
84
+
85
+ Returns:
86
+ torch.nn.Module: optimized module
87
+ """
88
+
89
+ def wrapper(model: torch.nn.Module, *args: Any, **kwargs: Any):
90
+ """Recursively apply the optimization function.
91
+
92
+ Args:
93
+ model (torch.nn.Module): original module
94
+ args (Any): positional arguments
95
+ kwargs (Any): keyword arguments
96
+
97
+ """
98
+ for name, layer in model.named_children():
99
+ new_layer = func(name, layer, *args, **kwargs)
100
+ if new_layer:
101
+ model.add_module(name, new_layer)
102
+ wrapper(new_layer, *args, **kwargs)
103
+ else:
104
+ wrapper(layer, *args, **kwargs)
105
+
106
+ return wrapper
107
+
108
+
109
+ @module_optimization
110
+ def lower_linear(name: str, layer: torch.nn.Module) -> Union[torch.nn.Module, None]:
111
+ """Lower torch.nn.Linear layer to NPU equivalent operators.
112
+
113
+ Args:
114
+ name (str): Layer name
115
+ layer (torch.nn.Module): Original torch.nn.Linear module
116
+
117
+ Raises:
118
+ RuntimeError: unsupported quantization bits
119
+
120
+ Returns:
121
+ Union[torch.nn.Module, None]: Return the new NPU operator or None
122
+ """
123
+ if isinstance(layer, torch.nn.Linear):
124
+ return nn.Linear.fromTorch(layer)
125
+ if isinstance(layer, torch.nn.Conv2d):
126
+ return nn.Conv2d.fromTorch(layer)
127
+ if isinstance(layer, WeightOnlyLinear):
128
+ if layer.bits == 4:
129
+ return nn.QuantizedLinear(
130
+ layer.qweight.to(torch.uint8), layer.scales, layer.bias
131
+ )
132
+ elif layer.bits == 8:
133
+ return nn.QuantizedLinear(
134
+ layer.qweight.view(torch.int8), layer.scales, layer.bias
135
+ )
136
+ else:
137
+ raise RuntimeError(f"Unsupported quantization bits: {layer.bits}")
138
+ return None
139
+
140
+
141
+ @module_optimization
142
+ def apply_horizontal_fusion(
143
+ name: str, layer: torch.nn.Module
144
+ ) -> Union[torch.nn.Module, None]:
145
+ """Apply horizontal fusion (merging two linear layers with same input) when necessary.
146
+
147
+ Args:
148
+ name (str): Layer name
149
+ layer (torch.nn.Module): Original module
150
+
151
+ Returns:
152
+ Union[torch.nn.Module, None]: optimized module
153
+ """
154
+ if isinstance(layer, (LlamaMLP, GemmaMLP)):
155
+ return horizontal_fusion_linear(layer)
156
+ return None
157
+
158
+
159
+ @module_optimization
160
+ def optimize_llama_attention(
161
+ name: str, layer: torch.nn.Module
162
+ ) -> Union[torch.nn.Module, None]:
163
+ """Optimize LLAMA attention block.
164
+
165
+ Args:
166
+ name (str): Module name
167
+ layer (torch.nn.Module): Original Module
168
+
169
+ Returns:
170
+ Union[torch.nn.Module, None]: optimized llama module
171
+ """
172
+ if isinstance(layer, (LlamaAttention, GemmaAttention)):
173
+ return nn.LlamaAttention.fromTorch(layer)
174
+ return None
175
+
176
+
177
+ @register_backend
178
+ def npu(
179
+ gm: Union[torch.nn.Module, torch.fx.GraphModule], example_inputs: List[torch.Tensor]
180
+ ) -> Union[torch.nn.Module, torch.fx.GraphModule]:
181
+ """Implement the custom torch 2.0 compile backend for the NPU.
182
+
183
+ Args:
184
+ gm (Union[torch.nn.Module, torch.fx.GraphModule]): The torch fx Module
185
+ example_inputs (List[torch.Tensor]): A list of example inputs
186
+
187
+ Returns:
188
+ Union[torch.nn.Module, torch.fx.GraphModule]: The compiled model
189
+ """
190
+ # Run some optimizations
191
+ gm = horizontal_fusion_linear(gm)
192
+
193
+ # For now compile in fp16
194
+ return compile(gm)
@@ -0,0 +1,230 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.nn.module import convert_to_npu_module
7
+ from intel_npu_acceleration_library.backend.tensor import RemoteTensor
8
+ from torch.overrides import TorchFunctionMode
9
+ from functools import lru_cache
10
+ from typing import Any, MutableMapping
11
+ import torch
12
+
13
+
14
+ class NPUDevice(TorchFunctionMode):
15
+ """
16
+ Represents an NPU device.
17
+
18
+ This class extends the `TorchFunctionMode` class and provides an implementation
19
+ for the `__torch_function__` method.
20
+
21
+ Attributes:
22
+ IMPLEMENTATIONS (MutableMapping[Any, Any]): A dictionary mapping functions to their implementations.
23
+
24
+ Methods:
25
+ __torch_function__(func, types, args=(), kwargs=None): Overrides the `__torch_function__`
26
+ method to provide custom behavior for torch functions.
27
+
28
+ """
29
+
30
+ IMPLEMENTATIONS: MutableMapping[Any, Any] = {}
31
+
32
+ def __torch_function__(
33
+ self, func: Any, types: Any, args: Any = (), kwargs: Any = None
34
+ ):
35
+ """
36
+ Override the torch function behavior for the device class.
37
+
38
+ Args:
39
+ func (Any): The torch function being called.
40
+ types (Any): The types of the arguments being passed to the function.
41
+ args (Any, optional): The positional arguments being passed to the function. Defaults to ().
42
+ kwargs (Any, optional): The keyword arguments being passed to the function. Defaults to None.
43
+
44
+ Returns:
45
+ Any: The result of the torch function call.
46
+ """
47
+
48
+ def super_fn(*args: Any, **kwargs: Any):
49
+ """Disable torch_function and returns the result of calling the `func` function with the given arguments and keyword arguments.
50
+
51
+ Parameters:
52
+ args (Any): Variable length argument list.
53
+ kwargs (Any): Arbitrary keyword arguments.
54
+
55
+ Returns:
56
+ Any: The result of calling the `func` function with the given arguments and keyword arguments.
57
+ """
58
+ # Disable torch_function by hand because we don't want the wrapping behavior of
59
+ # the super() impl
60
+ # with torch._C.DisableTorchFunction():
61
+ return func(*args, **kwargs)
62
+
63
+ if func in self.IMPLEMENTATIONS:
64
+ return self.IMPLEMENTATIONS[func](super_fn, *args, **kwargs or {})
65
+
66
+ # This is just a no-op for all the non-factory functions:
67
+ return super_fn(*args, **kwargs or {})
68
+
69
+
70
+ # Convenient wrapper to register functions
71
+ def implements_factory(func: Any):
72
+ """
73
+ Register a decorator function that implements a factory function.
74
+
75
+ Args:
76
+ func (Any): The factory function to register an implementation for.
77
+
78
+ Returns:
79
+ Callable: The decorated implementation function.
80
+ """
81
+
82
+ def _inner_fn(impl: Any):
83
+ """
84
+ Implement a decorator used to register an implementation for a specific function.
85
+
86
+ Args:
87
+ impl (Any): The implementation to be registered.
88
+
89
+ Returns:
90
+ Any: The registered implementation.
91
+ """
92
+ NPUDevice.IMPLEMENTATIONS[func] = impl
93
+ return impl
94
+
95
+ return _inner_fn
96
+
97
+
98
+ def parse_to_arguments(*args: Any, **kwargs: Any):
99
+ """
100
+ Parse the arguments and keyword arguments to handle device selection.
101
+
102
+ Args:
103
+ args: Variable length argument list.
104
+ kwargs: Arbitrary keyword arguments.
105
+
106
+ Returns:
107
+ Tuple: A tuple containing the following:
108
+ - npu_device (bool): Indicates whether the device is an NPU device.
109
+ - new_args (list): List of modified arguments.
110
+ - kwargs (dict): Dictionary of modified keyword arguments.
111
+ """
112
+ device = kwargs.get("device", None)
113
+ npu_device = False
114
+ if device == "npu":
115
+ npu_device = True
116
+ kwargs["device"] = "cpu"
117
+
118
+ new_args = []
119
+ for arg in args:
120
+ if arg == "npu":
121
+ npu_device = True
122
+ new_args.append("cpu")
123
+ else:
124
+ new_args.append(arg)
125
+
126
+ return npu_device, new_args, kwargs
127
+
128
+
129
+ @implements_factory(torch.device)
130
+ def device(super_fn: Any, device, *args: Any, **kwargs: Any):
131
+ """
132
+ Return the device based on the input device name.
133
+
134
+ Args:
135
+ super_fn (Any): The super function to call.
136
+ device (str): The name of the device.
137
+ args (Any): Additional positional arguments to pass to the super function.
138
+ kwargs (Any): Additional keyword arguments to pass to the super function.
139
+
140
+ Returns:
141
+ torch.device: The device object.
142
+
143
+ """
144
+ if device == "npu":
145
+ # Patch the device to return the NPU device
146
+ return torch.device("cpu")
147
+ return super_fn(device, *args, **kwargs)
148
+
149
+
150
+ @implements_factory(torch.Tensor.to)
151
+ def to(super_fn: Any, self: Any, *args: Any, **kwargs: Any):
152
+ """
153
+ Convert the tensor to the specified device.
154
+
155
+ Args:
156
+ super_fn: The super function to call.
157
+ args: Additional positional arguments.
158
+ kwargs: Additional keyword arguments.
159
+
160
+ Returns:
161
+ The converted tensor.
162
+
163
+ Note:
164
+ This implementation only supports a subset of the `.to()` functionality.
165
+ Once the remote tensor feature is available, it can be converted to a remote tensor.
166
+ """
167
+ npu_device, args, kwargs = parse_to_arguments(*args, **kwargs)
168
+ if npu_device:
169
+ return super_fn(RemoteTensor.from_torch(self), *args, **kwargs)
170
+ return super_fn(self, *args, **kwargs)
171
+
172
+
173
+ @implements_factory(torch._C._nn._parse_to)
174
+ def _parse_to(super_fn: Any, *args: Any, **kwarg: Any):
175
+ """
176
+ Parse the arguments and return the device, dtype, non_blocking, and convert_to_format.
177
+
178
+ Args:
179
+ super_fn (Any): The super function to call.
180
+ args (Any): Positional arguments.
181
+ kwarg (Any): Keyword arguments.
182
+
183
+ Returns:
184
+ Tuple: A tuple containing the device, dtype, non_blocking, and convert_to_format.
185
+ """
186
+ npu_device, args, kwargs = parse_to_arguments(*args, **kwarg)
187
+
188
+ device, dtype, non_blocking, convert_to_format = super_fn(*args, *kwargs)
189
+
190
+ if npu_device:
191
+ device = "npu"
192
+
193
+ return device, dtype, non_blocking, convert_to_format
194
+
195
+
196
+ def new_to(self, *args: Any, **kwargs: Any):
197
+ """
198
+ Move the input tensor(s) to the specified device.
199
+
200
+ Args:
201
+ args: Variable length argument list of devices to move the tensor(s) to.
202
+ kwargs: Keyword arguments for the `to` method.
203
+
204
+ Returns:
205
+ Tensor or Module: The tensor or module with the tensor(s) moved to the specified device(s).
206
+ """
207
+ npu_device, args, kwargs = parse_to_arguments(*args, *kwargs)
208
+
209
+ if npu_device:
210
+ self = convert_to_npu_module(self).to("npu")
211
+
212
+ return self._to(*args, **kwargs)
213
+
214
+
215
+ @lru_cache()
216
+ def enable_npu_device():
217
+ """
218
+ Enable the NPU device for acceleration.
219
+
220
+ This function globally enables the NPU device mode by creating an instance of `NPUDevice` and
221
+ modifying the `torch.nn.Module.to` method to use a custom implementation called `new_to`.
222
+
223
+ Usage:
224
+ enable_npu_device()
225
+
226
+ """
227
+ holder = NPUDevice()
228
+ holder.__enter__()
229
+ torch.nn.Module._to = torch.nn.Module.to
230
+ torch.nn.Module.to = new_to
@@ -0,0 +1,155 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Union
8
+ import numpy as np
9
+ import torch
10
+ import ctypes
11
+
12
+ @dataclass(frozen=True)
13
+ class NPUDtype:
14
+ """Represents a custom data type for NPUs (Neural Processing Units).
15
+
16
+ Attrs:
17
+ name: str: The name of the data type.
18
+ bits: int: The number of bits used to represent the data type.
19
+ min: int: The minimum value that can be represented by the data type.
20
+ max: int: The maximum value that can be represented by the data type.
21
+ torch_dtype: torch.dtype: The corresponding torch data type.
22
+ is_floating_point: bool: True if the data type is floating-point, False otherwise.
23
+ """
24
+
25
+ name: str
26
+ bits: int
27
+ min: int
28
+ max: int
29
+ torch_dtype: torch.dtype
30
+
31
+ @property
32
+ def is_floating_point(self) -> bool:
33
+ """
34
+ Check if the data type is a floating-point type.
35
+
36
+ Returns:
37
+ bool: True if the data type is floating-point, False otherwise.
38
+ """
39
+ return self.torch_dtype.is_floating_point
40
+
41
+ def __eq__(self, value: Union["NPUDtype", torch.dtype]) -> bool:
42
+ """
43
+ Compare the NPUDtype object with another NPUDtype or torch.dtype object.
44
+
45
+ Args:
46
+ value (Union["NPUDtype", torch.dtype]): The object to compare with.
47
+
48
+ Returns:
49
+ bool: True if the objects are equal, False otherwise.
50
+ """
51
+ if isinstance(value, torch.dtype):
52
+ if value.is_floating_point:
53
+ info = torch.finfo(value)
54
+ else:
55
+ info = torch.iinfo(value)
56
+ return (
57
+ self.bits == info.bits
58
+ and self.max == info.max
59
+ and self.min == info.min
60
+ and self.torch_dtype == value
61
+ )
62
+ if isinstance(value, type):
63
+ value = np.dtype(value)
64
+ if value.kind == "f":
65
+ info = np.finfo(value)
66
+ else:
67
+ info = np.iinfo(value)
68
+ return (
69
+ self.bits == info.bits and self.max == info.max and self.min == info.min
70
+ )
71
+ else:
72
+ return super().__eq__(value)
73
+
74
+ def __repr__(self) -> str:
75
+ """
76
+ Return a string representation of the NPUDtype object.
77
+
78
+ Returns:
79
+ str: The string representation of the NPUDtype object.
80
+ """
81
+ return self.name
82
+
83
+
84
+ def get_backend_dtype(dtype) -> ctypes.c_char_p:
85
+ """Get the string representation of the dtype.
86
+ Args:
87
+ dtype: numpy dtype
88
+ Raises:
89
+ RuntimeError: Unsupported datatype
90
+ Returns:
91
+ ctypes.c_char_p: string representation of the dtype
92
+ """
93
+ if dtype in [np.int8, torch.int8]:
94
+ str_dtype = "int8"
95
+ elif dtype in [np.uint8, int4, torch.uint8]:
96
+ # u8 represents packed i4 dtypes
97
+ str_dtype = "int4"
98
+ elif dtype in [np.int16, torch.int16]:
99
+ str_dtype = "int16"
100
+ elif dtype in [np.int32, torch.int32]:
101
+ str_dtype = "int32"
102
+ elif dtype in [np.int64, torch.int64]:
103
+ str_dtype = "int64"
104
+ elif dtype in [np.float16, torch.float16]:
105
+ str_dtype = "float16"
106
+ elif dtype in [np.float32, torch.float32]:
107
+ str_dtype = "float32"
108
+ elif dtype in [np.float64, torch.float64]:
109
+ str_dtype = "float64"
110
+ elif dtype in [bfloat16, torch.bfloat16]:
111
+ str_dtype = "bfloat16"
112
+ else:
113
+ raise RuntimeError(f"DType is not supported {dtype}")
114
+ return ctypes.c_char_p(str_dtype.encode())
115
+
116
+
117
+ float16 = NPUDtype(
118
+ "fp16",
119
+ 16,
120
+ torch.finfo(torch.float16).min,
121
+ torch.finfo(torch.float16).max,
122
+ torch.float16,
123
+ )
124
+ bfloat16 = NPUDtype(
125
+ "bf16",
126
+ 16,
127
+ torch.finfo(torch.bfloat16).min,
128
+ torch.finfo(torch.bfloat16).max,
129
+ torch.bfloat16,
130
+ )
131
+ float32 = NPUDtype(
132
+ "fp32",
133
+ 32,
134
+ torch.finfo(torch.float32).min,
135
+ torch.finfo(torch.float32).max,
136
+ torch.float32,
137
+ )
138
+ float64 = NPUDtype(
139
+ "fp64",
140
+ 64,
141
+ torch.finfo(torch.float64).min,
142
+ torch.finfo(torch.float64).max,
143
+ torch.float64,
144
+ )
145
+ int4 = NPUDtype("int4", 4, -8, 7, torch.int8)
146
+ int8 = NPUDtype("int8", 8, -128, 127, torch.int8)
147
+ int16 = NPUDtype(
148
+ "int16", 16, torch.iinfo(torch.int16).min, torch.iinfo(torch.int16).max, torch.int16
149
+ )
150
+ int32 = NPUDtype(
151
+ "int32", 32, torch.iinfo(torch.int32).min, torch.iinfo(torch.int32).max, torch.int32
152
+ )
153
+ int64 = NPUDtype(
154
+ "int64", 64, torch.iinfo(torch.int64).min, torch.iinfo(torch.int64).max, torch.int64
155
+ )