bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/include/common.h +96 -0
  3. bigdl-core-npu/include/npu_llm.h +74 -0
  4. bigdl-core-npu/npu_llm.dll +0 -0
  5. bigdl-core-npu/npu_llm.lib +0 -0
  6. bigdl_core_npu-2.6.0b20250114.dist-info/METADATA +44 -0
  7. bigdl_core_npu-2.6.0b20250114.dist-info/RECORD +234 -0
  8. bigdl_core_npu-2.6.0b20250114.dist-info/WHEEL +5 -0
  9. bigdl_core_npu-2.6.0b20250114.dist-info/top_level.txt +2 -0
  10. intel_npu_acceleration_library/__init__.py +24 -0
  11. intel_npu_acceleration_library/_version.py +6 -0
  12. intel_npu_acceleration_library/backend/__init__.py +37 -0
  13. intel_npu_acceleration_library/backend/base.py +250 -0
  14. intel_npu_acceleration_library/backend/bindings.py +383 -0
  15. intel_npu_acceleration_library/backend/compression.py +24 -0
  16. intel_npu_acceleration_library/backend/convolution.py +58 -0
  17. intel_npu_acceleration_library/backend/factory.py +1161 -0
  18. intel_npu_acceleration_library/backend/linear.py +60 -0
  19. intel_npu_acceleration_library/backend/matmul.py +59 -0
  20. intel_npu_acceleration_library/backend/mlp.py +58 -0
  21. intel_npu_acceleration_library/backend/ops.py +142 -0
  22. intel_npu_acceleration_library/backend/qlinear.py +75 -0
  23. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  24. intel_npu_acceleration_library/backend/runtime.py +215 -0
  25. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  26. intel_npu_acceleration_library/backend/tensor.py +1120 -0
  27. intel_npu_acceleration_library/backend/utils.py +70 -0
  28. intel_npu_acceleration_library/compiler.py +194 -0
  29. intel_npu_acceleration_library/device.py +230 -0
  30. intel_npu_acceleration_library/dtypes.py +155 -0
  31. intel_npu_acceleration_library/external/openvino/__init__.py +72 -0
  32. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +21 -0
  33. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  36. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +370 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +180 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +118 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +131 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +290 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +126 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +568 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +258 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  82. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  83. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  84. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +481 -0
  85. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  86. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  87. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  88. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +28 -0
  89. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  90. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  91. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +5 -0
  92. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  93. intel_npu_acceleration_library/external/openvino/properties/__init__.py +22 -0
  94. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  95. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  96. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  97. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  98. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  99. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  100. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  101. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  102. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +19 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3068 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +398 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +17 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +276 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +215 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +787 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +40 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  144. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  145. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  146. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  147. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +447 -0
  148. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  149. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +156 -0
  150. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  151. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  152. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  153. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  154. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  155. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  156. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  157. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  158. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  159. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  160. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  161. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  162. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  163. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  164. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +550 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +40 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  186. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +298 -0
  187. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  188. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +214 -0
  189. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  190. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  191. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  192. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  193. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  194. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  195. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +196 -0
  196. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  197. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  198. intel_npu_acceleration_library/external/openvino/utils.py +115 -0
  199. intel_npu_acceleration_library/functional/__init__.py +8 -0
  200. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  201. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  202. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  214. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  215. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  216. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  217. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  218. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  219. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  220. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  221. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  222. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  223. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  224. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  225. intel_npu_acceleration_library/modelling.py +150 -0
  226. intel_npu_acceleration_library/nn/__init__.py +20 -0
  227. intel_npu_acceleration_library/nn/autograd.py +68 -0
  228. intel_npu_acceleration_library/nn/conv.py +257 -0
  229. intel_npu_acceleration_library/nn/functional.py +1207 -0
  230. intel_npu_acceleration_library/nn/linear.py +162 -0
  231. intel_npu_acceleration_library/nn/llm.py +417 -0
  232. intel_npu_acceleration_library/nn/module.py +393 -0
  233. intel_npu_acceleration_library/optimizations.py +157 -0
  234. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+ from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM
6
+ import intel_npu_acceleration_library as npu_lib
7
+ from functools import partialmethod
8
+ from typing import Type, Any, Tuple, Optional
9
+ import hashlib
10
+ import torch
11
+ import os
12
+
13
+
14
+ def get_cache_dir() -> str:
15
+ """Get the model cache directory.
16
+
17
+ Returns:
18
+ str: path to the cache directory
19
+ """
20
+ return os.path.join("cache", "models")
21
+
22
+
23
+ def get_mangled_model_name(model_name: str, *args: Any, **kwargs: Any) -> str:
24
+ """Mangle the model name with all the parameters.
25
+
26
+ Args:
27
+ model_name (str): model name or path
28
+ args (Any): positional arguments
29
+ kwargs (Any): keyword arguments
30
+
31
+ Returns:
32
+ str: mangled name
33
+ """
34
+ # append all input parameters and create a string
35
+ arguments_str = f"{[str(arg) for arg in args] + [f'{str(key)}_{str(arg)}' for key, arg in kwargs.items()]}"
36
+ arguments_str_hash = hashlib.sha256(arguments_str.encode("utf-8")).hexdigest()
37
+ mangled_model_name = f"{model_name}_{arguments_str_hash}_{npu_lib.__version__}"
38
+ return mangled_model_name.replace("\\", "_").replace("/", "_")
39
+
40
+
41
+ def get_model_path(model_name: str, *args: Any, **kwargs: Any) -> Tuple[str, str]:
42
+ """Get the model path.
43
+
44
+ Args:
45
+ model_name (str): model name or path
46
+ args (Any): positional arguments
47
+ kwargs (Any): keyword arguments
48
+
49
+ Returns:
50
+ Tuple[str, str]: model directory and full path
51
+ """
52
+ cache_dir = get_cache_dir()
53
+ mangled_model_name = get_mangled_model_name(model_name, *args, **kwargs)
54
+ model_dir_path = os.path.join(cache_dir, mangled_model_name)
55
+ model_path = os.path.join(model_dir_path, "pytorch_npu_model.pt")
56
+ return model_dir_path, model_path
57
+
58
+
59
+ class NPUModel:
60
+ """Base NPU model class."""
61
+
62
+ @staticmethod
63
+ def from_pretrained(
64
+ model_name_or_path: str,
65
+ dtype: torch.dtype = torch.float16,
66
+ training: bool = False,
67
+ transformers_class: Optional[Type] = None,
68
+ export=True,
69
+ *args: Any,
70
+ **kwargs: Any,
71
+ ) -> torch.nn.Module:
72
+ """Template for the `from_pretrained` static method.
73
+
74
+ Args:
75
+ model_name_or_path (str): model name or path
76
+ dtype (torch.dtype, optional): compilation dtype. Defaults to torch.float16.
77
+ training (bool, optional): enable training. Defaults to False.
78
+ transformers_class (Optional[Type], optional): base class to use. Must have a `from_pretrained` method. Defaults to None.
79
+ export (bool, optional): enable the caching of the model. Defaults to True.
80
+ args (Any): positional arguments
81
+ kwargs (Any): keyword arguments
82
+
83
+ Raises:
84
+ RuntimeError: Invalid class
85
+ AttributeError: Cannot export model with trust_remote_code=True
86
+
87
+ Returns:
88
+ torch.nn.Module: compiled mode
89
+ """
90
+ if transformers_class is None:
91
+ raise RuntimeError(f"Invalid transformer class {type(transformers_class)}")
92
+ # get the model cache dir and path from the name and arguments
93
+ model_dir_path, model_path = get_model_path(
94
+ model_name_or_path, dtype, training, *args, **kwargs
95
+ )
96
+ if os.path.isdir(model_dir_path) and os.path.isfile(model_path):
97
+ # Model already exist so I can load it directly
98
+ return torch.load(model_path)
99
+ else:
100
+ # Model does not exists, so I need to compile it first
101
+ print(f"Compiling model {model_name_or_path} {dtype} for the NPU")
102
+ model = transformers_class.from_pretrained(
103
+ model_name_or_path, *args, **kwargs
104
+ )
105
+ model = npu_lib.compile(model, dtype, training)
106
+ if export:
107
+ if kwargs.get("trust_remote_code", False):
108
+ raise AttributeError(
109
+ "Cannot export model with trust_remote_code=True. Please set trust_remote_code=False or export=False"
110
+ )
111
+ print(f"Exporting model {model_name_or_path} to {model_dir_path}")
112
+ os.makedirs(model_dir_path, exist_ok=True)
113
+ torch.save(model, model_path)
114
+ return model
115
+
116
+
117
+ class NPUAutoModel:
118
+ """NPU wrapper for AutoModel.
119
+
120
+ Attrs:
121
+ from_pretrained: Load a pretrained model
122
+ """
123
+
124
+ from_pretrained = partialmethod(
125
+ NPUModel.from_pretrained, transformers_class=AutoModel
126
+ )
127
+
128
+
129
+ class NPUModelForCausalLM:
130
+ """NPU wrapper for AutoModelForCausalLM.
131
+
132
+ Attrs:
133
+ from_pretrained: Load a pretrained model
134
+ """
135
+
136
+ from_pretrained = partialmethod(
137
+ NPUModel.from_pretrained, transformers_class=AutoModelForCausalLM
138
+ )
139
+
140
+
141
+ class NPUModelForSeq2SeqLM:
142
+ """NPU wrapper for AutoModelForSeq2SeqLM.
143
+
144
+ Attrs:
145
+ from_pretrained: Load a pretrained model
146
+ """
147
+
148
+ from_pretrained = partialmethod(
149
+ NPUModel.from_pretrained, transformers_class=AutoModelForSeq2SeqLM
150
+ )
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from .functional import * # noqa
7
+ from .linear import Linear, QuantizedLinear # noqa
8
+ from .conv import Conv2d # noqa
9
+ from .module import Module # noqa
10
+
11
+ try:
12
+ from .llm import LlamaAttention, PhiMLP # noqa
13
+
14
+ llm_modules = ["LlamaAttention", "PhiMLP"]
15
+ except ModuleNotFoundError:
16
+ # Transformer library is not installed
17
+ llm_modules = []
18
+
19
+
20
+ __all__ = ["Module", "Linear", "QuantizedLinear", "Conv2d"] + llm_modules
@@ -0,0 +1,68 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend import run_matmul
7
+ from typing import Optional, Iterable, Union
8
+ import torch
9
+
10
+
11
+ class AutogradMatMul(torch.autograd.Function):
12
+ """Autograd module for Linear operation."""
13
+
14
+ @staticmethod
15
+ def forward(
16
+ ctx, x: torch.Tensor, w: torch.Tensor, scale: Optional[torch.Tensor] = None
17
+ ) -> torch.Tensor:
18
+ """Run a linear forward pass. Depending on the datatype of the weights it runs a float or quantized operation.
19
+
20
+ Equivalent pytorch code:
21
+ result = x @ w.T
22
+
23
+ Args:
24
+ ctx (Any): the autograd context
25
+ x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
26
+ w (torch.Tensor): Weight tensor. Its dtype must be torch.float16
27
+ scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
28
+
29
+ Returns:
30
+ torch.Tensor: result
31
+ """
32
+ result = run_matmul(x, w, scale, None)
33
+ ctx.save_for_backward(w, x)
34
+ return result
35
+
36
+ @staticmethod
37
+ def backward(ctx, grad_output: torch.Tensor) -> Iterable[Union[torch.Tensor, None]]:
38
+ """Run a linear backward pass.
39
+
40
+ grad_output shape: [batch, output_channels]
41
+ x shape: [batch, input_channels]
42
+ w shape: [output_channels, input_channels]
43
+
44
+ Expected gradients
45
+ dl_dx shape: [batch, input_channels]
46
+ dl_dw shape: [output_channels, input_channels]
47
+
48
+ Equivalent pytorch code:
49
+ dl_dx = grad_output @ w.to(torch.float32)
50
+ dl_dw = (x.T @ grad_output).T
51
+
52
+ Args:
53
+ ctx (Any): the autograd context
54
+ grad_output (torch.Tensor): output gradient
55
+
56
+ Returns:
57
+ Iterable[Union[torch.Tensor, None]]: Input and parameters gradients
58
+ """
59
+ (
60
+ w,
61
+ x,
62
+ ) = ctx.saved_tensors
63
+
64
+ dl_dx = run_matmul(grad_output, torch.transpose(w, -1, -2))
65
+ dl_dw = run_matmul(
66
+ torch.transpose(grad_output, -1, -2), torch.transpose(x, -1, -2)
67
+ )
68
+ return dl_dx, dl_dw, None
@@ -0,0 +1,257 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend import run_factory, Convolution
7
+ from intel_npu_acceleration_library.nn import Linear
8
+ from typing import Optional, Sequence, Union
9
+ from functools import partial
10
+ import torch
11
+ import uuid
12
+
13
+
14
+ class Im2ColConv2d(torch.nn.Module):
15
+ """
16
+ 2D convolutional layer implementation using Im2Col.
17
+
18
+ Attrs:
19
+ weight (torch.Tensor): The weight tensor of the layer.
20
+ bias (torch.Tensor): The bias tensor of the layer.
21
+
22
+ Args:
23
+ matmul (torch.nn.Module): The matrix multiplication module.
24
+ in_channels (int): Number of input channels.
25
+ out_channels (int): Number of output channels.
26
+ kernel_size (Union[int, Tuple[int, int]]): Size of the convolutional kernel.
27
+ stride (Union[int, Tuple[int, int]], optional): Stride of the convolution. Defaults to (1, 1).
28
+ padding (Union[int, Tuple[int, int]], optional): Padding added to the input. Defaults to (0, 0).
29
+ dilation (Union[int, Tuple[int, int]], optional): Dilation rate of the convolution. Defaults to (1, 1).
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ matmul,
35
+ in_channels,
36
+ out_channels,
37
+ kernel_size,
38
+ stride=(1, 1),
39
+ padding=(0, 0),
40
+ dilation=(1, 1),
41
+ ) -> None:
42
+ """Initialize a Convolutional layer.
43
+
44
+ Args:
45
+ matmul: The matrix multiplication function to be used.
46
+ in_channels: The number of input channels.
47
+ out_channels: The number of output channels.
48
+ kernel_size: The size of the convolutional kernel.
49
+ stride: The stride of the convolution. Defaults to (1, 1).
50
+ padding: The padding added to the input. Defaults to (0, 0).
51
+ dilation: The dilation rate of the convolution. Defaults to (1, 1).
52
+ """
53
+ super().__init__()
54
+
55
+ self.matmul = matmul
56
+ self.in_channels = in_channels
57
+ self.out_channels = out_channels
58
+ self.kernel_size = kernel_size
59
+ self.padding = padding
60
+ self.dilation = dilation
61
+ self.stride = stride
62
+
63
+ @property
64
+ def weight(self) -> torch.Tensor:
65
+ """
66
+ Get the weight tensor of the layer.
67
+
68
+ Returns:
69
+ torch.Tensor: The weight tensor.
70
+ """
71
+ return self.matmul.weight
72
+
73
+ @property
74
+ def bias(self) -> torch.Tensor:
75
+ """
76
+ Get the bias tensor of the layer.
77
+
78
+ Returns:
79
+ torch.Tensor: The bias tensor.
80
+ """
81
+ return self.matmul.bias
82
+
83
+ def compute_output_dim(self, dim, idx) -> int:
84
+ """
85
+ Compute the output dimension for a given input dimension.
86
+
87
+ Args:
88
+ dim (int): Input dimension.
89
+ idx (int): Index of the dimension.
90
+
91
+ Returns:
92
+ int: Output dimension.
93
+ """
94
+ return (
95
+ dim
96
+ + 2 * self.padding[idx]
97
+ - self.dilation[idx] * (self.kernel_size[idx] - 1)
98
+ - 1
99
+ ) // self.stride[idx] + 1
100
+
101
+ def forward(self, x) -> torch.Tensor:
102
+ """
103
+ Forward pass of the convolutional layer.
104
+
105
+ Args:
106
+ x (torch.Tensor): Input tensor.
107
+
108
+ Returns:
109
+ torch.Tensor: Output tensor.
110
+ """
111
+ # Unfold the input
112
+ inp_unf = torch.nn.functional.unfold(
113
+ x, self.kernel_size, self.dilation, self.padding, self.stride
114
+ ).transpose(1, 2)
115
+ out_unf = self.matmul(inp_unf).transpose(1, 2)
116
+
117
+ out_shape = [x.shape[0], self.out_channels] + [
118
+ self.compute_output_dim(dim, idx) for idx, dim in enumerate(x.shape[2:])
119
+ ]
120
+ out = out_unf.view(out_shape)
121
+
122
+ return out
123
+
124
+ @staticmethod
125
+ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Im2ColConv2d":
126
+ """
127
+ Create a Conv2d layer from a torch.nn.Conv2d layer.
128
+
129
+ Args:
130
+ layer (torch.nn.Conv2d): The torch Conv2d layer.
131
+ dtype (torch.dtype, optional): Data type of the layer.
132
+
133
+ Returns:
134
+ Im2ColConv2d: The converted Im2ColConv2d layer.
135
+ """
136
+ weight = layer.weight.view(layer.weight.shape[0], -1)
137
+ matmul = Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
138
+ new_layer = Im2ColConv2d(
139
+ matmul,
140
+ layer.in_channels,
141
+ layer.out_channels,
142
+ layer.kernel_size,
143
+ layer.stride,
144
+ layer.padding,
145
+ layer.dilation,
146
+ )
147
+
148
+ return new_layer
149
+
150
+
151
+ class Conv2d(torch.nn.Module):
152
+ """
153
+ 2D convolutional layer implementation.
154
+
155
+ Attrs:
156
+ weight (torch.Tensor): The weight tensor of the layer.
157
+ bias (torch.Tensor): The bias tensor of the layer.
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ weights: torch.Tensor,
163
+ bias: Optional[torch.Tensor] = None,
164
+ strides: Union[int, Sequence[int]] = 1,
165
+ padding: Union[int, Sequence[int]] = 0,
166
+ dilation: Union[int, Sequence[int]] = 1,
167
+ groups: int = 1,
168
+ ) -> None:
169
+ """Initialize a Convolutional layer.
170
+
171
+ Args:
172
+ weights (torch.Tensor): The weight tensor of the layer.
173
+ bias (Optional[torch.Tensor], optional): The bias tensor of the layer. Defaults to None.
174
+ strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
175
+ padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
176
+ dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
177
+ groups (int, optional): Groups. Defaults to 1.
178
+ """
179
+ super().__init__()
180
+
181
+ self.op_id = str(uuid.uuid4())
182
+ if groups > 1:
183
+ new_shape = [groups, weights.shape[0] // groups] + list(weights.shape[1:])
184
+ weights = weights.view(*new_shape)
185
+
186
+ self.parameters = [weights]
187
+ if bias is not None:
188
+ self.parameters.append(bias)
189
+ self.backend_cls = partial(
190
+ Convolution,
191
+ weights_shape=weights.shape,
192
+ bias=bias,
193
+ strides=strides,
194
+ padding=padding,
195
+ dilation=dilation,
196
+ groups=groups,
197
+ )
198
+
199
+ @property
200
+ def weight(self) -> torch.Tensor:
201
+ """
202
+ Get the weight tensor of the layer.
203
+
204
+ Returns:
205
+ torch.Tensor: The weight tensor.
206
+ """
207
+ return self.parameters[0]
208
+
209
+ @property
210
+ def bias(self) -> torch.Tensor:
211
+ """
212
+ Get the bias tensor of the layer.
213
+
214
+ Returns:
215
+ torch.Tensor: The bias tensor.
216
+ """
217
+ if len(self.parameters) > 1:
218
+ return self.parameters[1]
219
+ return None
220
+
221
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
222
+ """Torch module forward method.
223
+
224
+ Args:
225
+ x (torch.Tensor): Input tensor
226
+
227
+ Returns:
228
+ torch.Tensor: result
229
+ """
230
+ return run_factory(x, self.parameters, self.backend_cls, self.op_id)
231
+
232
+ @staticmethod
233
+ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
234
+ """
235
+ Create a Conv2d layer from a torch.nn.Conv2d layer.
236
+
237
+ Args:
238
+ layer (torch.nn.Conv2d): The torch Conv2d layer.
239
+ dtype (torch.dtype, optional): Data type of the layer.
240
+
241
+ Returns:
242
+ Conv2d: The converted Conv2d layer.
243
+ """
244
+ # In case of unsupported configuration, fallback to Im2ColConv2d
245
+ if any(dim > 11 for dim in layer.kernel_size):
246
+ return Im2ColConv2d.fromTorch(layer, dtype)
247
+
248
+ new_layer = Conv2d(
249
+ layer.weight,
250
+ layer.bias,
251
+ layer.stride,
252
+ layer.padding,
253
+ layer.dilation,
254
+ layer.groups,
255
+ )
256
+
257
+ return new_layer