bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. bigdl-core-npu/__init__.py +0 -0
  2. bigdl-core-npu/include/common.h +96 -0
  3. bigdl-core-npu/include/npu_llm.h +74 -0
  4. bigdl-core-npu/npu_llm.dll +0 -0
  5. bigdl-core-npu/npu_llm.lib +0 -0
  6. bigdl_core_npu-2.6.0b20250114.dist-info/METADATA +44 -0
  7. bigdl_core_npu-2.6.0b20250114.dist-info/RECORD +234 -0
  8. bigdl_core_npu-2.6.0b20250114.dist-info/WHEEL +5 -0
  9. bigdl_core_npu-2.6.0b20250114.dist-info/top_level.txt +2 -0
  10. intel_npu_acceleration_library/__init__.py +24 -0
  11. intel_npu_acceleration_library/_version.py +6 -0
  12. intel_npu_acceleration_library/backend/__init__.py +37 -0
  13. intel_npu_acceleration_library/backend/base.py +250 -0
  14. intel_npu_acceleration_library/backend/bindings.py +383 -0
  15. intel_npu_acceleration_library/backend/compression.py +24 -0
  16. intel_npu_acceleration_library/backend/convolution.py +58 -0
  17. intel_npu_acceleration_library/backend/factory.py +1161 -0
  18. intel_npu_acceleration_library/backend/linear.py +60 -0
  19. intel_npu_acceleration_library/backend/matmul.py +59 -0
  20. intel_npu_acceleration_library/backend/mlp.py +58 -0
  21. intel_npu_acceleration_library/backend/ops.py +142 -0
  22. intel_npu_acceleration_library/backend/qlinear.py +75 -0
  23. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  24. intel_npu_acceleration_library/backend/runtime.py +215 -0
  25. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  26. intel_npu_acceleration_library/backend/tensor.py +1120 -0
  27. intel_npu_acceleration_library/backend/utils.py +70 -0
  28. intel_npu_acceleration_library/compiler.py +194 -0
  29. intel_npu_acceleration_library/device.py +230 -0
  30. intel_npu_acceleration_library/dtypes.py +155 -0
  31. intel_npu_acceleration_library/external/openvino/__init__.py +72 -0
  32. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +21 -0
  33. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  34. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  35. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  36. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/experimental/__init__.py +14 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/jax/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/jax/jaxpr_decoder.py +293 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/jax/passes.py +65 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py +182 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +370 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +180 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +118 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +131 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +290 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +126 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +568 -0
  75. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +258 -0
  76. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  77. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  78. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  79. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  80. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  81. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  82. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  83. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  84. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +481 -0
  85. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  86. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  87. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  88. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +28 -0
  89. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  90. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  91. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +5 -0
  92. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  93. intel_npu_acceleration_library/external/openvino/properties/__init__.py +22 -0
  94. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  95. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  96. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  97. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  98. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  99. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  100. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  101. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  102. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +19 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3068 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +398 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +17 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +276 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +215 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +787 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +40 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  144. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  145. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  146. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  147. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +447 -0
  148. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  149. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +156 -0
  150. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  151. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  152. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  153. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  154. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  155. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  156. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  157. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  158. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  159. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  160. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  161. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  162. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  163. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  164. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +550 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +40 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/jax_frontend_utils.py +19 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  186. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +298 -0
  187. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  188. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +214 -0
  189. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  190. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  191. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  192. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  193. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  194. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  195. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +196 -0
  196. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  197. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  198. intel_npu_acceleration_library/external/openvino/utils.py +115 -0
  199. intel_npu_acceleration_library/functional/__init__.py +8 -0
  200. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  201. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  202. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  214. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  215. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  216. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  217. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  218. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  219. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  220. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  221. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  222. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  223. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  224. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  225. intel_npu_acceleration_library/modelling.py +150 -0
  226. intel_npu_acceleration_library/nn/__init__.py +20 -0
  227. intel_npu_acceleration_library/nn/autograd.py +68 -0
  228. intel_npu_acceleration_library/nn/conv.py +257 -0
  229. intel_npu_acceleration_library/nn/functional.py +1207 -0
  230. intel_npu_acceleration_library/nn/linear.py +162 -0
  231. intel_npu_acceleration_library/nn/llm.py +417 -0
  232. intel_npu_acceleration_library/nn/module.py +393 -0
  233. intel_npu_acceleration_library/optimizations.py +157 -0
  234. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,180 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ import torch
8
+ from functools import partial
9
+
10
+ # Wraps a single tensor to a module to prevent it from jit.freezing
11
+ # It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved.
12
+ class KeepWeight(torch.nn.Module):
13
+
14
+ def __init__(self, weight):
15
+ super().__init__()
16
+ self.weight = torch.nn.Parameter(weight, requires_grad=False)
17
+
18
+ def forward(self):
19
+ return self.weight
20
+
21
+
22
+ # Produces a pattern that can be captured later and represented as a single u4 constant node
23
+ def decompression_pattern(weights):
24
+ mask = torch.tensor(15, dtype=torch.uint8).to(weights.device)
25
+ return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1)
26
+
27
+
28
+ def patched_forward(self, *args, **kwargs):
29
+ if hasattr(self, '_hf_hook'):
30
+ args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
31
+
32
+ x = args[0]
33
+ dtype = x.dtype
34
+ outshape = x.shape[:-1] + (self.width,)
35
+ x = x.contiguous().view(-1, x.shape[-1])
36
+ groups = self.qzeros.shape[0]
37
+ height = self.qweight.shape[0]
38
+
39
+ unpacked_weights = decompression_pattern(
40
+ self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
41
+ unpacked_weights = torch.transpose(
42
+ unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
43
+ unpacked_zp = decompression_pattern(
44
+ self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
45
+
46
+ unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
47
+ unpacked_weights = unpacked_weights.view(-1, self.width)
48
+
49
+ out = x @ unpacked_weights
50
+
51
+ out = out.view(outshape)
52
+ if self.bias is not None:
53
+ out.add_(self.bias)
54
+
55
+ if hasattr(self, '_hf_hook'):
56
+ out = self._hf_hook.post_forward(self, out)
57
+ return out
58
+
59
+
60
+ def patched_forward_sym(self, *args, **kwargs):
61
+ if hasattr(self, '_hf_hook'):
62
+ args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
63
+
64
+ x = args[0]
65
+ dtype = x.dtype
66
+ outshape = x.shape[:-1] + (self.width,)
67
+ x = x.contiguous().view(-1, x.shape[-1])
68
+ height = self.qweight.shape[0]
69
+
70
+ unpacked_weights = decompression_pattern(
71
+ self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
72
+ unpacked_weights = torch.transpose(
73
+ unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
74
+
75
+ # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
76
+ unpacked_weights = unpacked_weights.to(dtype) * self.scales
77
+ unpacked_weights = unpacked_weights.view(-1, self.width)
78
+
79
+ out = x @ unpacked_weights
80
+
81
+ out = out.view(outshape)
82
+ if self.bias is not None:
83
+ out.add_(self.bias)
84
+
85
+ if hasattr(self, '_hf_hook'):
86
+ out = self._hf_hook.post_forward(self, out)
87
+ return out
88
+
89
+
90
+ # All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
91
+ supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
92
+
93
+
94
+ def patch_model(model):
95
+ is_symmetrical = False
96
+ config = None
97
+ if hasattr(model, "config"):
98
+ config = model.config
99
+ elif hasattr(model, "model") and hasattr(model.model, "config"):
100
+ # original model was wrapped
101
+ config = model.model.config
102
+ if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
103
+ is_symmetrical = config.quantization_config.sym
104
+ for name, m in model.named_modules():
105
+ if hasattr(m, '_openvino_patch_orig_forward'):
106
+ # already patched, skipping
107
+ continue
108
+ # TODO: Check module type
109
+ is_quantized = getattr(m, 'is_quantized', None)
110
+ if is_quantized is not None:
111
+ m.is_quantized = False
112
+ m.float() # enables tracing on CPU, applied for all modules
113
+ if hasattr(m, 'QUANT_TYPE'):
114
+ if m.QUANT_TYPE not in supported_quant_types:
115
+ raise ValueError(
116
+ f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}')
117
+ if m.bits != 4:
118
+ raise ValueError(
119
+ f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.')
120
+
121
+ int4_in_int32 = 8
122
+ groups = m.qzeros.shape[0]
123
+ m.width = m.qweight.shape[1]
124
+ assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
125
+
126
+ m._openvino_patch_orig_forward = m.forward
127
+ if is_symmetrical:
128
+ m.forward = partial(patched_forward_sym, m)
129
+ else:
130
+ m.forward = partial(patched_forward, m)
131
+
132
+ # Keep original field properties to be used when model is returned back to its original state
133
+ m._openvino_patch_orig_qweights_type = m.qweight.dtype
134
+ m._openvino_patch_orig_qzeros_type = m.qzeros.dtype
135
+ m._openvino_patch_orig_scale_shape = m.scales.shape
136
+
137
+ m.qweight = m.qweight.view(dtype=torch.uint8)
138
+ m.qzeros = m.qzeros.view(dtype=torch.uint8)
139
+
140
+ # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
141
+ m.add_module(
142
+ '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
143
+ # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
144
+ m.add_module('_openvino_u4_compression_submodule_qzeros',
145
+ KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
146
+
147
+ m.scales = m.scales.view(-1, 1, m.width)
148
+
149
+
150
+ def unpatch_model(model):
151
+ for _, m in model.named_modules():
152
+ if hasattr(m, '_openvino_patch_orig_forward'):
153
+ try:
154
+ m.forward = m._openvino_patch_orig_forward
155
+ del m._openvino_patch_orig_forward
156
+
157
+ m.qweight = m.qweight.view(
158
+ dtype=m._openvino_patch_orig_qweights_type)
159
+ del m._openvino_patch_orig_qweights_type
160
+
161
+ m.qzeros = m.qzeros.view(
162
+ dtype=m._openvino_patch_orig_qzeros_type)
163
+ del m._openvino_patch_orig_qzeros_type
164
+
165
+ m.scales = m.scales.view(m._openvino_patch_orig_scale_shape)
166
+ del m._openvino_patch_orig_scale_shape
167
+
168
+ del m._openvino_u4_compression_submodule_qweights
169
+ del m._openvino_u4_compression_submodule_qzeros
170
+ except Exception as error:
171
+ print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model')
172
+ print(error)
173
+
174
+
175
+ def detect_gptq_model_raw(model):
176
+ return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq'
177
+
178
+
179
+ def detect_gptq_model(model):
180
+ return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model)
@@ -0,0 +1,39 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ class ModuleExtension:
8
+ def __init__(self, module, target_op, evaluate=None, convert=None):
9
+ """
10
+ Creates an extension that replaces entire PyTorch module by a single operation.
11
+ This functionality works with PyTorch models only. A module can be identified by
12
+ module type (e.g. torch.nn.Linear), module instance in the model or module name.
13
+
14
+ Args:
15
+ module (str, torch.nn.Module, type(torch.nn.Module)): PyTorch module to replace
16
+
17
+ target_op (str): a target operation that will be used as a replacer for the module,
18
+ could be a name of the extension operation or existing PyTorch operation
19
+ (with prim:: or aten:: prefix following TorchScript syntax).
20
+
21
+ evaluate (callable with args module, *args, **kwargs): a callable that will replace a target
22
+ module in model execution it is responsible for producing valid output for
23
+ the module to allow correct model tracing. By default it calls original module
24
+ forward with the same arguments. The provided code will not be a part of the final
25
+ traced model, it is used only to produce valid results in the tracing.
26
+
27
+ convert (callable with args target_op, *args, **kwargs): a callable that will be traced and become
28
+ a part of the final model instead of the target module. It accepts target_op as
29
+ the first parameter, target_op is callable that will appear as a single node in the
30
+ graph, the type of the node is target_op provided as another argument above.
31
+ """
32
+ self.module = module
33
+ self.target_op = target_op
34
+ self.evaluate = evaluate
35
+ if self.evaluate is None:
36
+ self.evaluate = lambda module, *args, **kwargs: module(*args, **kwargs)
37
+ self.convert = convert
38
+ if self.convert is None:
39
+ self.convert = lambda module, target_op, *args, **kwargs: target_op(*args, **kwargs)
@@ -0,0 +1,118 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ import torch
8
+ from openvino.frontend.pytorch import ModuleExtension
9
+
10
+
11
+ class no_jit_trace:
12
+ def __enter__(self):
13
+ self.state = torch._C._get_tracing_state()
14
+ torch._C._set_tracing_state(None)
15
+
16
+ def __exit__(self, *args):
17
+ torch._C._set_tracing_state(self.state)
18
+ self.state = None
19
+
20
+
21
+ def patch_model(model, module_extensions, orig_forward_name):
22
+ def module_patcher(m, name):
23
+ extension = None
24
+ if m in module_extensions:
25
+ extension = module_extensions[m]
26
+ elif m.__class__ in module_extensions:
27
+ extension = module_extensions[m.__class__]
28
+ elif name in module_extensions:
29
+ extension = module_extensions[name]
30
+
31
+ if extension:
32
+ # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
33
+
34
+ class Trampoline(torch.autograd.Function):
35
+ target_extension = extension
36
+ original_module = m
37
+ stashed_args = None
38
+ stashed_kwargs = None
39
+
40
+ @staticmethod
41
+ @torch.jit.ignore
42
+ def forward(*args, **kwargs):
43
+ with no_jit_trace():
44
+ # `module` is going to be passed to a user-defined function `evaluate`
45
+ # `module` is patched: forward function was replaced, and we are actually in this patched function right in this code
46
+ # if we pass `module` as-is to the user code below, and it happens to call forward it will lead to infinite recursion or fail
47
+ # so we need to temporary patch the module back to the original forward and then return it back again
48
+ # stash the current forward to be able to return it back
49
+ patched_forward = m.forward
50
+ # set original forward for the module
51
+ m.forward = getattr(m, orig_forward_name)
52
+ # call user code
53
+ results = extension.evaluate(
54
+ m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs) # call user code
55
+ m.forward = patched_forward # return patched forward back
56
+ return results
57
+
58
+ def new_forward(*args, **kwargs):
59
+ Trampoline.stashed_args = args
60
+ Trampoline.stashed_kwargs = kwargs
61
+ return extension.convert(m, Trampoline.apply, *args, **kwargs)
62
+ setattr(m, orig_forward_name, m.forward)
63
+ m.forward = new_forward
64
+
65
+ for name, m in model.named_modules():
66
+ if hasattr(m, orig_forward_name):
67
+ # already patched, skipping with a warning because it is unexpected
68
+ print(f'[ WARNING ] Unexpectedly found already patched module {name} while applying ModuleExtension during PyTorch model conversion. '
69
+ 'Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model.')
70
+ continue
71
+ module_patcher(m, name)
72
+
73
+
74
+ def unpatch_model(model, orig_forward_name):
75
+ for _, m in model.named_modules():
76
+ if hasattr(m, orig_forward_name):
77
+ try:
78
+ m.forward = getattr(m, orig_forward_name)
79
+ delattr(m, orig_forward_name)
80
+ except Exception as error:
81
+ print('[ WARNING ] Exception raised during model unpatching. Depending on the exact issue it may lead to broken original model.')
82
+ print('Original exception details:')
83
+ print(error)
84
+
85
+
86
+ def __make_16bit_traceable(model: torch.nn.Module):
87
+ """
88
+ Prepare a 16-bit PyTorch model for tracing with OpenVINO.
89
+ - Replace known list of modules with ModuleExtension.
90
+ - Convert other modules with weights to FP32.
91
+ """
92
+ extensions = {
93
+ torch.nn.Linear: ModuleExtension(
94
+ torch.nn.Linear, "ov_ext::linear",
95
+ evaluate=lambda module, *args, **kwargs: torch.full(
96
+ list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32),
97
+ convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)),
98
+ torch.nn.Embedding: ModuleExtension(
99
+ torch.nn.Embedding, "ov_ext::embedding",
100
+ evaluate=lambda module, *args, **kwargs: torch.full(
101
+ list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32),
102
+ convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)),
103
+ }
104
+ try:
105
+ from transformers.pytorch_utils import Conv1D
106
+ extensions[Conv1D] = ModuleExtension(
107
+ Conv1D, "ov_ext::conv1d",
108
+ evaluate=lambda module, *args, **kwargs: torch.full(
109
+ list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32),
110
+ convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
111
+ except:
112
+ pass
113
+ patch_model(model, extensions,
114
+ "_openvino_module_extension_patch_orig_forward")
115
+ for _, module in model.named_modules():
116
+ if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)])
117
+ or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])):
118
+ module.float()
@@ -0,0 +1,131 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ import logging
8
+ import os
9
+ from functools import partial
10
+ from hashlib import sha256
11
+
12
+ import torch
13
+ from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
14
+ from torch._dynamo.backends.registry import register_backend
15
+ from torch._inductor.compile_fx import compile_fx
16
+ from torch._inductor.freezing import replace_params_with_constants
17
+ from torch.fx.experimental.proxy_tensor import make_fx
18
+ from torch._decomp import decomposition_table, get_decompositions
19
+
20
+ from openvino.frontend import FrontEndManager
21
+ from openvino.runtime import Core, Type, PartialShape
22
+ from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
23
+ from openvino.frontend.pytorch.torchdynamo import decompositions
24
+ from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list
25
+ from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
26
+ from openvino.frontend.pytorch.torchdynamo.execute import execute, execute_cached
27
+ from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model
28
+ from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd
29
+
30
+ from openvino.runtime import Core, Type, PartialShape
31
+
32
+ logger = logging.getLogger(__name__)
33
+ logger.setLevel(logging.WARNING)
34
+
35
+ """
36
+ This is a preview feature in OpenVINO. This feature
37
+ enables users to compile PyTorch models using torch.compile
38
+ with OpenVINO as a target backend in PyTorch applications
39
+
40
+ Sample usage:
41
+ This sample code loads resnet50 torchvision model and compiles it using torch dynamo.
42
+ We can then use this model for inference. We only need to add two lines of code to
43
+ the Pytorch applications which are marked in the code below
44
+
45
+ 1) import openvino.torch
46
+ model = torchvision.models.resnet50()
47
+ 2) model = torch.compile(model, backend="openvino")
48
+ """
49
+
50
+ openvino_options = {}
51
+
52
+ @register_backend
53
+ @fake_tensor_unsupported
54
+ def openvino(subgraph, example_inputs, options=None):
55
+ if (_get_aot_autograd(options)):
56
+ global openvino_options
57
+ openvino_options = options
58
+ decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
59
+ return aot_autograd(fw_compiler=fx_openvino,
60
+ bw_compiler=fx_openvino,
61
+ decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
62
+ return fx_openvino(subgraph, example_inputs, options)
63
+
64
+ def fx_openvino(subgraph, example_inputs, options=None):
65
+ try:
66
+ if len(openvino_options) != 0:
67
+ options = openvino_options
68
+ executor_parameters = None
69
+ inputs_reversed = False
70
+ openvino_model_caching = _get_model_caching(options)
71
+ if openvino_model_caching is not None and openvino_model_caching:
72
+ # Create a hash to be used for caching
73
+ model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
74
+ executor_parameters = {"model_hash_str": model_hash_str}
75
+ # Check if the model was fully supported and already cached
76
+ example_inputs.reverse()
77
+ inputs_reversed = True
78
+ maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", _get_device(options), example_inputs, _get_cache_dir(options))
79
+ if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
80
+ # Model is fully supported and already cached. Run the cached OV model directly.
81
+ compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs)
82
+ def _call(*args):
83
+ res = execute_cached(compiled_model, *args)
84
+ return res
85
+ return _call
86
+ if inputs_reversed:
87
+ example_inputs.reverse()
88
+
89
+ preserved_arg_indices = []
90
+ if (_get_aot_autograd(options)):
91
+ if tracing_context := torch._guards.TracingContext.try_get():
92
+ fw_metadata = tracing_context.fw_metadata
93
+ params_flat = tracing_context.params_flat
94
+ assert fw_metadata is not None and params_flat is not None
95
+ preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
96
+ example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
97
+ model = subgraph
98
+ else:
99
+ from torch._subclasses.fake_tensor import FakeTensorMode
100
+ decompositions = _get_decompositions(options) + get_inf_decomposition_list()
101
+ with FakeTensorMode(allow_non_fake_inputs=True):
102
+ model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs)
103
+
104
+ with torch.no_grad():
105
+ model.eval()
106
+ partitioner = Partitioner(options)
107
+ compiled_model = partitioner.make_partitions(model, options)
108
+
109
+ if executor_parameters is not None and 'model_hash_str' in executor_parameters:
110
+ # Check if the model is fully supported.
111
+ fully_supported = partitioner.check_fully_supported(compiled_model)
112
+ if fully_supported:
113
+ executor_parameters["model_hash_str"] += "_fs"
114
+
115
+ def _call(*args):
116
+ if(_get_aot_autograd(options)):
117
+ args_list = args[0]
118
+ args_new = [args_list[i] for i in preserved_arg_indices]
119
+ args = args_new
120
+ res = execute(compiled_model, *args, executor="openvino",
121
+ executor_parameters=executor_parameters, options=options)
122
+ return res
123
+ if(_get_aot_autograd(options)):
124
+ _call._boxed_call = True # type: ignore[attr-defined]
125
+ return _call
126
+ except Exception as e:
127
+ logger.debug(f"Failed in OpenVINO execution: {e}")
128
+ return compile_fx(subgraph, example_inputs)
129
+
130
+ def reset():
131
+ clear_caches()
@@ -0,0 +1,85 @@
1
+ # Copyright (C) 2018-2024 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # flake8: noqa
5
+ # mypy: ignore-errors
6
+
7
+ from typing import Optional, Any
8
+ from openvino.runtime import Core
9
+
10
+
11
+ def _get_device(options) -> Optional[Any]:
12
+ core = Core()
13
+ device = "CPU"
14
+
15
+ if options is not None and "device" in options:
16
+ device = options["device"]
17
+
18
+ if device is not None:
19
+ assert device in core.available_devices, (
20
+ "Specified device "
21
+ + device
22
+ + " is not in the list of OpenVINO Available Devices"
23
+ )
24
+ else:
25
+ device = "CPU"
26
+ return device
27
+
28
+ def _is_cache_dir_in_config(options) -> Optional[Any]:
29
+ if options is not None and "config" in options:
30
+ cfg = options["config"]
31
+ if cfg is not None and "CACHE_DIR" in cfg:
32
+ return True
33
+ return False
34
+
35
+
36
+ def _get_cache_dir(options) -> Optional[Any]:
37
+ cache_dir = "./cache"
38
+ if options is not None and "cache_dir" in options:
39
+ cache_dir = options["cache_dir"]
40
+ if _is_cache_dir_in_config(options):
41
+ cache_dir = options["config"]["CACHE_DIR"]
42
+ return cache_dir
43
+
44
+
45
+ def _get_aot_autograd(options) -> Optional[Any]:
46
+ if options is not None and "aot_autograd" in options:
47
+ aot_autograd = options["aot_autograd"]
48
+ if bool(aot_autograd) and str(aot_autograd).lower() not in ["false", "0"]:
49
+ return True
50
+ else:
51
+ return False
52
+
53
+
54
+ def _get_model_caching(options) -> Optional[Any]:
55
+ if options is not None and "model_caching" in options:
56
+ caching = options["model_caching"]
57
+ if bool(caching) and str(caching).lower() not in ["false", "0"]:
58
+ return True
59
+ return False
60
+
61
+
62
+ def _get_config(options) -> Optional[Any]:
63
+ if options is not None and "config" in options:
64
+ return options["config"]
65
+ return {}
66
+
67
+ def _get_decompositions(options) -> Optional[Any]:
68
+ decompositions = []
69
+ if options is not None and "decompositions" in options:
70
+ decompositions = options["decompositions"]
71
+ return decompositions
72
+
73
+ def _get_disabled_ops(options) -> Optional[Any]:
74
+ disabled_ops = []
75
+ if options is not None and "disabled_ops" in options:
76
+ disabled_ops = options["disabled_ops"]
77
+ return disabled_ops
78
+
79
+ def _is_testing(options) -> Optional[Any]:
80
+ if options is not None and "testing" in options:
81
+ is_testing = options["testing"]
82
+ if bool(is_testing) and str(is_testing).lower not in ["false", "0"]:
83
+ return True
84
+ return False
85
+