bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. bigdl_core_npu-2.5.0.dist-info/METADATA +35 -0
  2. bigdl_core_npu-2.5.0.dist-info/RECORD +223 -0
  3. bigdl_core_npu-2.5.0.dist-info/WHEEL +5 -0
  4. bigdl_core_npu-2.5.0.dist-info/top_level.txt +1 -0
  5. intel_npu_acceleration_library/__init__.py +24 -0
  6. intel_npu_acceleration_library/_version.py +6 -0
  7. intel_npu_acceleration_library/backend/__init__.py +37 -0
  8. intel_npu_acceleration_library/backend/base.py +215 -0
  9. intel_npu_acceleration_library/backend/bindings.py +279 -0
  10. intel_npu_acceleration_library/backend/compression.py +24 -0
  11. intel_npu_acceleration_library/backend/convolution.py +58 -0
  12. intel_npu_acceleration_library/backend/factory.py +944 -0
  13. intel_npu_acceleration_library/backend/linear.py +60 -0
  14. intel_npu_acceleration_library/backend/matmul.py +59 -0
  15. intel_npu_acceleration_library/backend/mlp.py +58 -0
  16. intel_npu_acceleration_library/backend/ops.py +141 -0
  17. intel_npu_acceleration_library/backend/qlinear.py +71 -0
  18. intel_npu_acceleration_library/backend/qmatmul.py +66 -0
  19. intel_npu_acceleration_library/backend/runtime.py +210 -0
  20. intel_npu_acceleration_library/backend/sdpa.py +107 -0
  21. intel_npu_acceleration_library/backend/tensor.py +1050 -0
  22. intel_npu_acceleration_library/backend/utils.py +70 -0
  23. intel_npu_acceleration_library/compiler.py +194 -0
  24. intel_npu_acceleration_library/device.py +230 -0
  25. intel_npu_acceleration_library/dtypes.py +122 -0
  26. intel_npu_acceleration_library/external/openvino/__init__.py +71 -0
  27. intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +20 -0
  28. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
  29. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
  30. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
  31. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
  32. intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
  33. intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
  34. intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
  35. intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
  36. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
  37. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
  38. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
  39. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
  40. intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
  41. intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
  42. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
  43. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
  44. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
  45. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
  46. intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
  47. intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
  48. intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +352 -0
  49. intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +139 -0
  50. intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
  51. intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +98 -0
  52. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
  53. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
  54. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
  55. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
  56. intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
  57. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +119 -0
  58. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
  59. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
  60. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
  61. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
  62. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +289 -0
  63. intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +118 -0
  64. intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +536 -0
  65. intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +256 -0
  66. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
  67. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
  68. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
  69. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
  70. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
  71. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
  72. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
  73. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
  74. intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +460 -0
  75. intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
  76. intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
  77. intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
  78. intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +26 -0
  79. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
  80. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
  81. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +4 -0
  82. intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
  83. intel_npu_acceleration_library/external/openvino/properties/__init__.py +21 -0
  84. intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
  85. intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
  86. intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
  87. intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
  88. intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
  89. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
  90. intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
  91. intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
  92. intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
  93. intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
  94. intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
  95. intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
  96. intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +18 -0
  97. intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
  98. intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
  99. intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3067 -0
  100. intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
  101. intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
  102. intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
  103. intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
  104. intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
  105. intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
  106. intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
  107. intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +399 -0
  108. intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
  109. intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
  110. intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +10 -0
  111. intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +85 -0
  112. intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
  113. intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
  114. intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
  115. intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
  116. intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
  117. intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
  118. intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
  119. intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
  120. intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
  121. intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +189 -0
  122. intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
  123. intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
  124. intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
  125. intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +783 -0
  126. intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
  127. intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
  128. intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
  129. intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
  130. intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
  131. intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
  132. intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +38 -0
  133. intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
  134. intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
  135. intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
  136. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
  137. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +429 -0
  138. intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
  139. intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +70 -0
  140. intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
  141. intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
  142. intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
  143. intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
  144. intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
  145. intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
  146. intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
  147. intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
  148. intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
  149. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
  150. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
  151. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
  152. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
  153. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
  154. intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
  155. intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
  156. intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
  157. intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
  158. intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
  159. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
  160. intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +536 -0
  161. intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
  162. intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
  163. intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
  164. intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
  165. intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
  166. intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +35 -0
  167. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
  168. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
  169. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
  170. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
  171. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
  172. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
  173. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
  174. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
  175. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +246 -0
  176. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
  177. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +205 -0
  178. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
  179. intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
  180. intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
  181. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
  182. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
  183. intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
  184. intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +109 -0
  185. intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
  186. intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
  187. intel_npu_acceleration_library/external/openvino/utils.py +98 -0
  188. intel_npu_acceleration_library/functional/__init__.py +8 -0
  189. intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
  190. intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
  191. intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
  192. intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
  193. intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
  194. intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
  195. intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
  196. intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
  197. intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
  198. intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
  199. intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
  200. intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
  201. intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
  202. intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
  203. intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
  204. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
  205. intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
  206. intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
  207. intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
  208. intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
  209. intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
  210. intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
  211. intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
  212. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
  213. intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
  214. intel_npu_acceleration_library/modelling.py +150 -0
  215. intel_npu_acceleration_library/nn/__init__.py +20 -0
  216. intel_npu_acceleration_library/nn/autograd.py +68 -0
  217. intel_npu_acceleration_library/nn/conv.py +257 -0
  218. intel_npu_acceleration_library/nn/functional.py +1207 -0
  219. intel_npu_acceleration_library/nn/linear.py +162 -0
  220. intel_npu_acceleration_library/nn/llm.py +417 -0
  221. intel_npu_acceleration_library/nn/module.py +393 -0
  222. intel_npu_acceleration_library/optimizations.py +157 -0
  223. intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,60 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class Linear(NNFactory):
11
+ """Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ ):
21
+ """Initialize the Linear class.
22
+
23
+ Args:
24
+ inC (int): input channels
25
+ outC (int): output channels
26
+ batch (int): batch
27
+ profile (bool): Enable/Disable profiling. Defaults to False.
28
+ device (str): Target device, default to "NPU".
29
+ """
30
+ super().__init__(profile, device)
31
+ self.inC, self.outC = inC, outC
32
+ self.batch = batch
33
+ input = self.parameter((self.batch, self.inC))
34
+ _ = self.linear(input, outC, inC, bias=False)
35
+ self.compile()
36
+
37
+ def run(self, X: np.ndarray, W: np.ndarray, op_id: str) -> np.ndarray:
38
+ """Run the layer: X * W^T.
39
+
40
+ Args:
41
+ X (np.ndarray): lhs operator
42
+ W (np.ndarray): rhs operator
43
+ op_id (str): operation id
44
+
45
+ Raises:
46
+ RuntimeError: Input or weight tensor shape mismatch
47
+
48
+ Returns:
49
+ np.ndarray: result
50
+ """
51
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
52
+ raise RuntimeError(
53
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
54
+ )
55
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
56
+ raise RuntimeError(
57
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
58
+ )
59
+
60
+ return super().run(X, W, op_id=op_id)
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class MatMul(NNFactory):
11
+ """MatMul class, computing a matrix matrix multiplication."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ ):
21
+ """Initialize the MatMul class.
22
+
23
+ Args:
24
+ inC (int): input channels
25
+ outC (int): output channels
26
+ batch (int): batch
27
+ profile (bool): Enable/Disable profiling. Defaults to False.
28
+ device (str): Target device, default to "NPU".
29
+ """
30
+ super().__init__(profile, device)
31
+ self.inC, self.outC = inC, outC
32
+ self.batch = batch
33
+ input = self.parameter((self.batch, self.inC))
34
+ _ = self.linear(input, outC, inC, bias=False)
35
+ self.compile()
36
+
37
+ def run(self, X: np.ndarray, W: np.ndarray) -> np.ndarray:
38
+ """Run the layer: X * W^T.
39
+
40
+ Args:
41
+ X (np.ndarray): lhs operator
42
+ W (np.ndarray): rhs operator
43
+
44
+ Raises:
45
+ RuntimeError: Input or weight tensor shape mismatch
46
+
47
+ Returns:
48
+ np.ndarray: result
49
+ """
50
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
51
+ raise RuntimeError(
52
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
53
+ )
54
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
55
+ raise RuntimeError(
56
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
57
+ )
58
+
59
+ return super().run(X, W)
@@ -0,0 +1,58 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ from typing import Optional, Sequence
8
+
9
+
10
+ class MLP(NNFactory):
11
+ """Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ input_shape: Sequence[int],
16
+ intermediate_size: int,
17
+ activation: str = "swiglu",
18
+ bias: Optional[bool] = False,
19
+ profile: bool = False,
20
+ device: str = "NPU",
21
+ **additional_args
22
+ ):
23
+ """Initialize the Linear class.
24
+
25
+ Args:
26
+ input_shape (Sequence[int]): input shape channels
27
+ intermediate_size (int): intermediate_size
28
+ activation (str): activation function to use
29
+ bias (Optional[bool], optional): Enable/Disable bias. Defaults to False.
30
+ profile (bool): Enable/Disable profiling. Defaults to False.
31
+ device (str): Target device, default to "NPU".
32
+ additional_args: additional arguments
33
+ """
34
+ super().__init__(profile, device)
35
+ self.intermediate_size = intermediate_size
36
+ self.batch, self.hidden_size = input_shape
37
+ input = self.parameter((self.batch, self.hidden_size))
38
+
39
+ mm1 = self.linear(input, self.intermediate_size, self.hidden_size, bias=bias)
40
+
41
+ if activation == "swiglu":
42
+ mm2 = self.linear(input, self.intermediate_size, self.hidden_size, bias=bias) # type: ignore[attr-defined]
43
+ mm1 = self.eltwise_mul(self.swish(mm1), mm2) # type: ignore[attr-defined]
44
+ elif activation == "clamp":
45
+ atc_fn = getattr(self, activation)
46
+ mm1 = atc_fn(mm1, additional_args.get("min"), additional_args.get("max"))
47
+ elif activation == "elu":
48
+ atc_fn = getattr(self, activation)
49
+ mm1 = atc_fn(mm1, additional_args.get("alpha", 1.0))
50
+ elif activation == "grn":
51
+ atc_fn = getattr(self, activation)
52
+ mm1 = atc_fn(mm1, additional_args.get("grn_bias"))
53
+ else:
54
+ atc_fn = getattr(self, activation)
55
+ mm1 = atc_fn(mm1)
56
+
57
+ _ = self.linear(mm1, self.hidden_size, self.intermediate_size, bias=bias)
58
+ self.compile()
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from dataclasses import dataclass
7
+ from functools import lru_cache
8
+ from typing import List, Any, Sequence
9
+ import ctypes
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class SupportedOp:
14
+ """A class for supported runtime OPs in the NPU.
15
+
16
+ Attrs:
17
+ name (str): Operation name
18
+ inputs (int): Number of inputs
19
+ parameters (Sequence[Any]): Optional parameters type.
20
+ """
21
+
22
+ name: str
23
+ inputs: int
24
+ parameters: Sequence[Any] = ()
25
+
26
+
27
+ @lru_cache(maxsize=None)
28
+ def get_supported_ops() -> List[SupportedOp]:
29
+ """Generate a list fo supported operations.
30
+
31
+ Returns:
32
+ List[SupportedOp]: list fo supported NPU operations
33
+ """
34
+ supported_ops = [
35
+ SupportedOp(name="result", inputs=1),
36
+ SupportedOp(name="matmul", inputs=2, parameters=[ctypes.c_bool, ctypes.c_bool]),
37
+ SupportedOp(name="eltwise_add", inputs=2),
38
+ SupportedOp(name="eltwise_mul", inputs=2),
39
+ SupportedOp(name="eltwise_div", inputs=2),
40
+ SupportedOp(name="abs_act", inputs=1),
41
+ SupportedOp(name="acos_act", inputs=1),
42
+ SupportedOp(name="asin_act", inputs=1),
43
+ SupportedOp(name="atan_act", inputs=1),
44
+ SupportedOp(name="ceiling", inputs=1),
45
+ SupportedOp(
46
+ name="clamp", inputs=1, parameters=[ctypes.c_float, ctypes.c_float]
47
+ ),
48
+ SupportedOp(name="cos_act", inputs=1),
49
+ SupportedOp(name="cosh_act", inputs=1),
50
+ SupportedOp(name="erf_act", inputs=1),
51
+ SupportedOp(name="elu", inputs=1, parameters=[ctypes.c_float]),
52
+ SupportedOp(name="exp_act", inputs=1),
53
+ SupportedOp(name="floor_act", inputs=1),
54
+ SupportedOp(name="grn", inputs=1, parameters=[ctypes.c_float]),
55
+ SupportedOp(name="gelu", inputs=1),
56
+ SupportedOp(name="gelu_erf", inputs=1),
57
+ SupportedOp(name="log_act", inputs=1),
58
+ SupportedOp(name="negative", inputs=1),
59
+ SupportedOp(name="relu", inputs=1),
60
+ SupportedOp(name="sigmoid", inputs=1),
61
+ SupportedOp(name="sign", inputs=1),
62
+ SupportedOp(name="sin_act", inputs=1),
63
+ SupportedOp(name="sinh_act", inputs=1),
64
+ SupportedOp(name="sqrt_act", inputs=1),
65
+ SupportedOp(name="tan_act", inputs=1),
66
+ SupportedOp(name="tanh_act", inputs=1),
67
+ SupportedOp(name="acosh_act", inputs=1),
68
+ SupportedOp(name="asinh_act", inputs=1),
69
+ SupportedOp(name="atanh_act", inputs=1),
70
+ SupportedOp(name="hswish", inputs=1),
71
+ SupportedOp(name="mish", inputs=1),
72
+ SupportedOp(name="softplus", inputs=1),
73
+ SupportedOp(name="hsigmoid", inputs=1),
74
+ SupportedOp(name="round_act", inputs=1),
75
+ SupportedOp(name="softsign", inputs=1),
76
+ SupportedOp(name="softmax", inputs=1, parameters=[ctypes.c_int]),
77
+ SupportedOp(name="swish", inputs=1),
78
+ SupportedOp(name="convert_to_fp16", inputs=1),
79
+ SupportedOp(name="convert_to_fp32", inputs=1),
80
+ SupportedOp(name="convert_to_int32", inputs=1),
81
+ SupportedOp(
82
+ name="scaled_dot_product_attention",
83
+ inputs=4,
84
+ parameters=[ctypes.c_bool],
85
+ ),
86
+ SupportedOp(
87
+ name="scaled_dot_product_attention_simple",
88
+ inputs=3,
89
+ parameters=[ctypes.c_bool],
90
+ ),
91
+ SupportedOp(
92
+ name="normL2",
93
+ inputs=2,
94
+ parameters=[ctypes.c_float],
95
+ ),
96
+ SupportedOp(
97
+ name="gather",
98
+ inputs=3,
99
+ parameters=[ctypes.c_int],
100
+ ),
101
+ SupportedOp(name="reshape", inputs=2),
102
+ SupportedOp(name="transpose", inputs=2),
103
+ SupportedOp(name="squeeze", inputs=1),
104
+ SupportedOp(name="unsqueeze", inputs=2),
105
+ SupportedOp(
106
+ name="concat",
107
+ inputs=2,
108
+ parameters=[ctypes.c_int64],
109
+ ),
110
+ SupportedOp(
111
+ name="reduce_max",
112
+ inputs=2,
113
+ parameters=[ctypes.c_bool],
114
+ ),
115
+ SupportedOp(
116
+ name="reduce_mean",
117
+ inputs=2,
118
+ parameters=[ctypes.c_bool],
119
+ ),
120
+ SupportedOp(
121
+ name="reduce_min",
122
+ inputs=2,
123
+ parameters=[ctypes.c_bool],
124
+ ),
125
+ SupportedOp(
126
+ name="reduce_prod",
127
+ inputs=2,
128
+ parameters=[ctypes.c_bool],
129
+ ),
130
+ SupportedOp(
131
+ name="reduce_sum",
132
+ inputs=2,
133
+ parameters=[ctypes.c_bool],
134
+ ),
135
+ SupportedOp(name="adaptive_avg_pool", inputs=2),
136
+ SupportedOp(name="adaptive_max_pool", inputs=2),
137
+ SupportedOp(name="power", inputs=2),
138
+ SupportedOp(name="broadcast", inputs=2),
139
+ SupportedOp(name="log_softmax", inputs=1, parameters=[ctypes.c_int64]),
140
+ ]
141
+ return supported_ops
@@ -0,0 +1,71 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class QLinear(NNFactory):
11
+ """Quantized Linear class, computing a matrix matrix multiplication with weights prefetching."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ dtype: np.dtype = np.int8,
21
+ ):
22
+ """Initialize the QLinear class.
23
+
24
+ Args:
25
+ inC (int): input channels
26
+ outC (int): output channels
27
+ batch (int): batch
28
+ profile (bool): Enable/Disable profiling. Defaults to False.
29
+ device (str): Target device, default to "NPU".
30
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
31
+
32
+ """
33
+ super().__init__(profile, device)
34
+ self.inC, self.outC = inC, outC
35
+ self.batch = batch
36
+
37
+ input = self.parameter((self.batch, self.inC))
38
+ _ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype)
39
+ self.compile()
40
+
41
+ def run(
42
+ self, X: np.ndarray, W: np.ndarray, scale: np.ndarray, op_id: str
43
+ ) -> np.ndarray:
44
+ """Run the layer: $X * (W * S)^T$ .
45
+
46
+ Args:
47
+ X (np.ndarray): activation
48
+ W (np.ndarray): quantized weights
49
+ scale (np.ndarray): quantization scale
50
+ op_id (str): operation id
51
+
52
+ Raises:
53
+ RuntimeError: Input, weights or scale shape mismatch
54
+
55
+ Returns:
56
+ np.ndarray: result
57
+ """
58
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
59
+ raise RuntimeError(
60
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
61
+ )
62
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
63
+ raise RuntimeError(
64
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
65
+ )
66
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
67
+ raise RuntimeError(
68
+ f"Scale shape {W.shape} different from expected one {(self.outC, 1)}"
69
+ )
70
+
71
+ return super().run(X, (W, scale), op_id=op_id)
@@ -0,0 +1,66 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend.factory import NNFactory
7
+ import numpy as np
8
+
9
+
10
+ class QMatMul(NNFactory):
11
+ """Quantized Linear class, computing a matrix matrix multiplication."""
12
+
13
+ def __init__(
14
+ self,
15
+ inC: int,
16
+ outC: int,
17
+ batch: int,
18
+ profile: bool = False,
19
+ device: str = "NPU",
20
+ dtype: np.dtype = np.int8,
21
+ ):
22
+ """Initialize the QMatmul class.
23
+
24
+ Args:
25
+ inC (int): input channels
26
+ outC (int): output channels
27
+ batch (int): batch
28
+ profile (bool): Enable/Disable profiling. Defaults to False.
29
+ device (str): Target device, default to "NPU".
30
+ dtype (np.dtype): weights datatype. Defaults to np.int8.
31
+ """
32
+ super().__init__(profile, device)
33
+ self.inC, self.outC = inC, outC
34
+ self.batch = batch
35
+ input = self.parameter((self.batch, self.inC))
36
+ _ = self.linear(input, outC, inC, bias=False, wt_dtype=dtype)
37
+ self.compile()
38
+
39
+ def run(self, X: np.ndarray, W: np.ndarray, scale: np.ndarray) -> np.ndarray:
40
+ """Run the layer: X * (W * S)^T.
41
+
42
+ Args:
43
+ X (np.ndarray): activation
44
+ W (np.ndarray): quantized weights
45
+ scale (np.ndarray): quantization scale
46
+
47
+ Raises:
48
+ RuntimeError: Input, weights or scale shape mismatch
49
+
50
+ Returns:
51
+ np.ndarray: result
52
+ """
53
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
54
+ raise RuntimeError(
55
+ f"Input shape {X.shape} different from expected one {(self.batch, self.inC)}"
56
+ )
57
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
58
+ raise RuntimeError(
59
+ f"Weight shape {W.shape} different from expected one {(self.outC, self.inC)}"
60
+ )
61
+ if not (X.shape[0] == self.batch and X.shape[1] == self.inC):
62
+ raise RuntimeError(
63
+ f"Scale shape {W.shape} different from expected one {(self.outC, 1)}"
64
+ )
65
+
66
+ return super().run(X, (W, scale))
@@ -0,0 +1,210 @@
1
+ #
2
+ # Copyright © 2024 Intel Corporation
3
+ # SPDX-License-Identifier: Apache 2.0
4
+ #
5
+
6
+ from intel_npu_acceleration_library.backend import Linear, QLinear
7
+ from intel_npu_acceleration_library.backend import MatMul, QMatMul
8
+ from intel_npu_acceleration_library.backend import NNFactory
9
+ from torch.profiler import record_function
10
+ from typing import Optional, Any, List, Dict, Deque, Union
11
+ from functools import partial
12
+ from collections import deque
13
+ import numpy as np
14
+ import torch
15
+
16
+ _model_cache: Dict[str, Deque[NNFactory]] = {}
17
+
18
+
19
+ def clear_cache():
20
+ """Clear the cache of models."""
21
+ global _model_cache
22
+ _model_cache = {}
23
+
24
+
25
+ @torch.no_grad()
26
+ def run_matmul(
27
+ x: torch.Tensor,
28
+ weights: torch.Tensor,
29
+ scale: Optional[torch.Tensor] = None,
30
+ op_id: Optional[str] = None,
31
+ ) -> torch.Tensor:
32
+ """Run a matmul operation. Depending on the datatype of the weights it runs a float or quantized operation.
33
+
34
+ Args:
35
+ x (torch.Tensor): Activation tensor. Its dtype must be torch.float16
36
+ weights (torch.Tensor): Weights tensor. Its dtype can be torch.float16 or torch.int8
37
+ scale (Optional[torch.Tensor], optional): Quantization scale. If weights.dtype == torch.int8 then it must be set. Defaults to None.
38
+ op_id (Optional[str], optional): Operation ID. Defaults to None.
39
+
40
+ Raises:
41
+ RuntimeError: Unsupported weights datatype. Supported types: [torch.float16, torch.int8]
42
+
43
+ Returns:
44
+ torch.Tensor: result
45
+ """
46
+ global _model_cache
47
+
48
+ outC, inC = weights.shape[-2:]
49
+
50
+ if weights.dtype == torch.uint8:
51
+ # In case is Int4 we need to double the input channels because weights are compressed
52
+ inC *= 2
53
+
54
+ # Set tensors as contiguous in memory
55
+ x = set_contiguous(x)
56
+ weights = set_contiguous(weights)
57
+ if len(weights.shape) > 2:
58
+ weights = weights.view([-1, weights.shape[-1]])
59
+
60
+ if weights.dtype.is_floating_point:
61
+ op_class = Linear if op_id is not None else MatMul
62
+ op_class_name = op_class.__name__
63
+ create_op = partial(op_class)
64
+ op_args = [weights.numpy()]
65
+ elif weights.dtype in (torch.int8, torch.uint8):
66
+ if scale is None:
67
+ raise RuntimeError("Quantized weights require a not null scale")
68
+ op_class = QLinear if op_id is not None else QMatMul
69
+ op_class_name = op_class.__name__
70
+ np_dtype = np.int8 if weights.dtype == torch.int8 else np.uint8
71
+ create_op = partial(op_class, dtype=np_dtype)
72
+ if scale is None:
73
+ raise RuntimeError(
74
+ f"Quantized matmul (weights dtype == {weights.dtype}) requires scale (scale = {scale})"
75
+ )
76
+ op_args = [weights.numpy(), scale.numpy()]
77
+ else:
78
+ raise RuntimeError(f"Unsupported dtype for weights {weights.dtype}")
79
+
80
+ if not x.dtype.is_floating_point:
81
+ raise RuntimeError(f"Unsupported dtype for activation {x.dtype}")
82
+
83
+ # Use or not op_id depending on the class used
84
+ op_kwargs = {"op_id": op_id} if op_id else {}
85
+
86
+ original_input_shape = x.shape
87
+ expected_output_shape = list(original_input_shape[:-1]) + [outC]
88
+
89
+ if not (len(x.shape) >= 2):
90
+ raise RuntimeError(f"Input shape {x.shape} must me >= 2")
91
+
92
+ # Reshape input
93
+ input_dtype = x.dtype
94
+ x = x.to(torch.float16) if input_dtype != torch.float16 else x
95
+ if len(x.shape) > 2 or x.shape[-1] != inC:
96
+ x = x.view([-1, inC])
97
+ x_np = x.numpy()
98
+
99
+ batch = x_np.shape[0]
100
+
101
+ key = f"{str(op_class_name)}_{batch}_{inC}_x_{outC}_{inC}_{x_np.dtype}"
102
+ models = _model_cache.get(key, None)
103
+
104
+ if models is None:
105
+ _model_cache[key] = deque([create_op(inC, outC, batch)])
106
+ elif len(models) < 1:
107
+ _model_cache[key].append(create_op(inC, outC, batch))
108
+ else:
109
+ _model_cache[key].rotate(1)
110
+
111
+ # Get the model
112
+ model = _model_cache[key][0]
113
+
114
+ profiling_name = "matvec" if batch == 1 else "matmul"
115
+ with record_function(f"npu_{profiling_name}_{key}"):
116
+ ret = model.run(x_np, *op_args, **op_kwargs)
117
+
118
+ return adapt_output_tensor(ret, expected_output_shape, input_dtype)
119
+
120
+
121
+ def adapt_output_tensor(
122
+ output: np.ndarray, original_shape: torch.Size, input_dtype: torch.dtype
123
+ ) -> torch.Tensor:
124
+ """Adapt the output tensor to the original shape and dtype.
125
+
126
+ Args:
127
+ output (np.ndarray): output tensor
128
+ original_shape (torch.Size): original shape
129
+ input_dtype (torch.dtype): input dtype
130
+
131
+ Returns:
132
+ torch.Tensor: output tensor
133
+ """
134
+ output = torch.from_numpy(output)
135
+ if output.shape != original_shape:
136
+ output = output.view(original_shape)
137
+ # needs to copy as the same buffer can be reutilized
138
+ return output.to(input_dtype, copy=True)
139
+
140
+
141
+ def set_contiguous(tensor: torch.Tensor) -> torch.Tensor:
142
+ """Set tensor to be contiguous in memory.
143
+
144
+ Args:
145
+ tensor (torch.Tensor): input tensor
146
+
147
+ Returns:
148
+ torch.Tensor: output, contiguous tensor
149
+ """
150
+ if not tensor.is_contiguous():
151
+ return tensor.contiguous()
152
+ return tensor
153
+
154
+
155
+ @torch.no_grad()
156
+ def run_factory(
157
+ x: Union[torch.Tensor, List[torch.Tensor]],
158
+ weights: List[torch.Tensor],
159
+ backend_cls: Any,
160
+ op_id: Optional[str] = None,
161
+ replica: int = 1,
162
+ ) -> torch.Tensor:
163
+ """Run a factory operation. Depending on the datatype of the weights it runs a float or quantized operation.
164
+
165
+ Args:
166
+ x (Union[torch.Tensor, List[torch.Tensor]]): Activation tensor(s). Its dtype must be torch.float16
167
+ weights (torch.Tensor): Weights tensor. Its dtype can be torch.float16 or torch.int8
168
+ backend_cls (Any): Backend class to run
169
+ op_id (Optional[str], optional): Operation ID. Defaults to None.
170
+
171
+ Returns:
172
+ torch.Tensor: result
173
+ """
174
+ global _model_cache
175
+
176
+ # Use or not op_id depending on the class used
177
+ op_kwargs = {"op_id": op_id} if op_id else {}
178
+
179
+ if not isinstance(x, (list, tuple)):
180
+ x = [x]
181
+
182
+ # Reshape input
183
+ input_dtype = x[0].dtype
184
+ x_np = [set_contiguous(elem).to(torch.float16).numpy() for elem in x]
185
+ op_args = [set_contiguous(w).to(torch.float16).numpy() for w in weights]
186
+
187
+ shape_dtype_signature = "_".join(
188
+ ["_".join(str(dim) for dim in t.shape) + f"_{t.dtype}" for t in x_np + op_args]
189
+ )
190
+ key = f"{backend_cls.func.__name__}_{shape_dtype_signature}"
191
+ models = _model_cache.get(key, None)
192
+
193
+ input_shapes = [elem.shape for elem in x_np]
194
+ if models is None:
195
+ _model_cache[key] = deque([backend_cls(*input_shapes) for i in range(replica)])
196
+ elif len(models) < 1:
197
+ _model_cache[key].append(backend_cls(*input_shapes))
198
+ else:
199
+ _model_cache[key].rotate(1)
200
+
201
+ # Get the model
202
+ model = _model_cache[key][0]
203
+
204
+ with record_function(f"npu_factory_mul_{key}"):
205
+ ret = model.run(*x_np, *op_args, **op_kwargs)
206
+
207
+ if isinstance(ret, list):
208
+ return [adapt_output_tensor(r, r.shape, input_dtype) for r in ret]
209
+
210
+ return adapt_output_tensor(ret, ret.shape, input_dtype)