bigdl-core-npu 2.5.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl_core_npu-2.5.0.dist-info/METADATA +35 -0
- bigdl_core_npu-2.5.0.dist-info/RECORD +223 -0
- bigdl_core_npu-2.5.0.dist-info/WHEEL +5 -0
- bigdl_core_npu-2.5.0.dist-info/top_level.txt +1 -0
- intel_npu_acceleration_library/__init__.py +24 -0
- intel_npu_acceleration_library/_version.py +6 -0
- intel_npu_acceleration_library/backend/__init__.py +37 -0
- intel_npu_acceleration_library/backend/base.py +215 -0
- intel_npu_acceleration_library/backend/bindings.py +279 -0
- intel_npu_acceleration_library/backend/compression.py +24 -0
- intel_npu_acceleration_library/backend/convolution.py +58 -0
- intel_npu_acceleration_library/backend/factory.py +944 -0
- intel_npu_acceleration_library/backend/linear.py +60 -0
- intel_npu_acceleration_library/backend/matmul.py +59 -0
- intel_npu_acceleration_library/backend/mlp.py +58 -0
- intel_npu_acceleration_library/backend/ops.py +141 -0
- intel_npu_acceleration_library/backend/qlinear.py +71 -0
- intel_npu_acceleration_library/backend/qmatmul.py +66 -0
- intel_npu_acceleration_library/backend/runtime.py +210 -0
- intel_npu_acceleration_library/backend/sdpa.py +107 -0
- intel_npu_acceleration_library/backend/tensor.py +1050 -0
- intel_npu_acceleration_library/backend/utils.py +70 -0
- intel_npu_acceleration_library/compiler.py +194 -0
- intel_npu_acceleration_library/device.py +230 -0
- intel_npu_acceleration_library/dtypes.py +122 -0
- intel_npu_acceleration_library/external/openvino/__init__.py +71 -0
- intel_npu_acceleration_library/external/openvino/_offline_transformations/__init__.py +20 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/_pyopenvino.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/__init__.py +34 -0
- intel_npu_acceleration_library/external/openvino/frontend/frontend.py +44 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/__init__.py +19 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py +352 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py +139 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/module_extension.py +39 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py +98 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py +119 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py +85 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py +141 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py +116 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py +189 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py +289 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py +118 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/ts_decoder.py +536 -0
- intel_npu_acceleration_library/external/openvino/frontend/pytorch/utils.py +256 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/__init__.py +16 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/graph_iterator.py +116 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/node_decoder.py +219 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp310-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp311-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp312-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp38-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/py_tensorflow_frontend.cp39-win_amd64.pyd +0 -0
- intel_npu_acceleration_library/external/openvino/frontend/tensorflow/utils.py +460 -0
- intel_npu_acceleration_library/external/openvino/helpers/__init__.py +6 -0
- intel_npu_acceleration_library/external/openvino/helpers/packing.py +87 -0
- intel_npu_acceleration_library/external/openvino/preprocess/README.md +60 -0
- intel_npu_acceleration_library/external/openvino/preprocess/__init__.py +26 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/preprocess_converter.py +47 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/requirements.txt +4 -0
- intel_npu_acceleration_library/external/openvino/preprocess/torchvision/torchvision_preprocessing.py +347 -0
- intel_npu_acceleration_library/external/openvino/properties/__init__.py +21 -0
- intel_npu_acceleration_library/external/openvino/properties/_properties.py +55 -0
- intel_npu_acceleration_library/external/openvino/properties/device/__init__.py +14 -0
- intel_npu_acceleration_library/external/openvino/properties/hint/__init__.py +15 -0
- intel_npu_acceleration_library/external/openvino/properties/intel_auto/__init__.py +12 -0
- intel_npu_acceleration_library/external/openvino/properties/intel_cpu/__init__.py +8 -0
- intel_npu_acceleration_library/external/openvino/properties/intel_gpu/__init__.py +12 -0
- intel_npu_acceleration_library/external/openvino/properties/intel_gpu/hint/__init__.py +11 -0
- intel_npu_acceleration_library/external/openvino/properties/log/__init__.py +11 -0
- intel_npu_acceleration_library/external/openvino/properties/streams/__init__.py +11 -0
- intel_npu_acceleration_library/external/openvino/runtime/__init__.py +85 -0
- intel_npu_acceleration_library/external/openvino/runtime/exceptions.py +17 -0
- intel_npu_acceleration_library/external/openvino/runtime/ie_api.py +631 -0
- intel_npu_acceleration_library/external/openvino/runtime/op/__init__.py +18 -0
- intel_npu_acceleration_library/external/openvino/runtime/op/util/__init__.py +22 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/__init__.py +112 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset1/ops.py +3067 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset10/__init__.py +179 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset10/ops.py +173 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset11/__init__.py +179 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset11/ops.py +107 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset12/__init__.py +180 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset12/ops.py +120 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset13/__init__.py +188 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset13/ops.py +399 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset14/__init__.py +190 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset14/ops.py +171 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/__init__.py +10 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset15/ops.py +85 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset2/__init__.py +118 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset2/ops.py +216 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset3/__init__.py +134 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset3/ops.py +638 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset4/__init__.py +145 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset4/ops.py +464 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset5/__init__.py +152 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset5/ops.py +372 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset6/__init__.py +154 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset6/ops.py +189 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset7/__init__.py +158 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset7/ops.py +169 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset8/__init__.py +169 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset8/ops.py +783 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset9/__init__.py +175 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset9/ops.py +341 -0
- intel_npu_acceleration_library/external/openvino/runtime/opset_utils.py +22 -0
- intel_npu_acceleration_library/external/openvino/runtime/passes/__init__.py +19 -0
- intel_npu_acceleration_library/external/openvino/runtime/passes/graph_rewrite.py +33 -0
- intel_npu_acceleration_library/external/openvino/runtime/passes/manager.py +26 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/__init__.py +38 -0
- intel_npu_acceleration_library/external/openvino/runtime/properties/hint/__init__.py +25 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/__init__.py +7 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/broadcasting.py +44 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/__init__.py +8 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/data_dispatcher.py +429 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/data_helpers/wrappers.py +148 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/decorators.py +70 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/input_validation.py +133 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/node_factory.py +127 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/reduction.py +25 -0
- intel_npu_acceleration_library/external/openvino/runtime/utils/types.py +175 -0
- intel_npu_acceleration_library/external/openvino/tools/__init__.py +4 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/__init__.py +3 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/benchmark.py +186 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/main.py +695 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/parameters.py +199 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/__init__.py +3 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/constants.py +26 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/inputs_filling.py +482 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/logging.py +8 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/statistics_report.py +296 -0
- intel_npu_acceleration_library/external/openvino/tools/benchmark/utils/utils.py +836 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/__init__.py +20 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/__main__.py +10 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/cli_parser.py +633 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert.py +102 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_data_type.py +82 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/convert_impl.py +536 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/environment_setup_utils.py +50 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/error.py +49 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/get_ov_update_message.py +16 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/help.py +45 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/logger.py +91 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/main.py +35 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/__init__.py +2 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/analysis.py +46 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/check_config.py +57 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/extractor.py +447 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/layout_utils.py +73 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/moc_emit_ir.py +32 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/offline_transformations.py +107 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/paddle_frontend_utils.py +83 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pipeline.py +246 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/preprocessing.py +220 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +205 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/shape_utils.py +109 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/moc_frontend/type_utils.py +82 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/ovc.py +13 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_params.py +6 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_stub.py +28 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/telemetry_utils.py +118 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/utils.py +109 -0
- intel_npu_acceleration_library/external/openvino/tools/ovc/version.py +80 -0
- intel_npu_acceleration_library/external/openvino/torch/__init__.py +5 -0
- intel_npu_acceleration_library/external/openvino/utils.py +98 -0
- intel_npu_acceleration_library/functional/__init__.py +8 -0
- intel_npu_acceleration_library/functional/scaled_dot_product_attention.py +47 -0
- intel_npu_acceleration_library/lib/Release/cache.json +113732 -0
- intel_npu_acceleration_library/lib/Release/intel_npu_acceleration_library.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_batch_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_auto_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_c.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_hetero_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_cpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_gpu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_intel_npu_plugin.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_ir_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_onnx_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_paddle_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_pytorch_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/openvino_tensorflow_lite_frontend.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbb12_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbbind_2_5_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_debug.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy.dll +0 -0
- intel_npu_acceleration_library/lib/Release/tbbmalloc_proxy_debug.dll +0 -0
- intel_npu_acceleration_library/modelling.py +150 -0
- intel_npu_acceleration_library/nn/__init__.py +20 -0
- intel_npu_acceleration_library/nn/autograd.py +68 -0
- intel_npu_acceleration_library/nn/conv.py +257 -0
- intel_npu_acceleration_library/nn/functional.py +1207 -0
- intel_npu_acceleration_library/nn/linear.py +162 -0
- intel_npu_acceleration_library/nn/llm.py +417 -0
- intel_npu_acceleration_library/nn/module.py +393 -0
- intel_npu_acceleration_library/optimizations.py +157 -0
- intel_npu_acceleration_library/quantization.py +174 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# flake8: noqa
|
5
|
+
# mypy: ignore-errors
|
6
|
+
|
7
|
+
class ModuleExtension:
|
8
|
+
def __init__(self, module, target_op, evaluate=None, convert=None):
|
9
|
+
"""
|
10
|
+
Creates an extension that replaces entire PyTorch module by a single operation.
|
11
|
+
This functionality works with PyTorch models only. A module can be identified by
|
12
|
+
module type (e.g. torch.nn.Linear), module instance in the model or module name.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
module (str, torch.nn.Module, type(torch.nn.Module)): PyTorch module to replace
|
16
|
+
|
17
|
+
target_op (str): a target operation that will be used as a replacer for the module,
|
18
|
+
could be a name of the extension operation or existing PyTorch operation
|
19
|
+
(with prim:: or aten:: prefix following TorchScript syntax).
|
20
|
+
|
21
|
+
evaluate (callable with args module, *args, **kwargs): a callable that will replace a target
|
22
|
+
module in model execution it is responsible for producing valid output for
|
23
|
+
the module to allow correct model tracing. By default it calls original module
|
24
|
+
forward with the same arguments. The provided code will not be a part of the final
|
25
|
+
traced model, it is used only to produce valid results in the tracing.
|
26
|
+
|
27
|
+
convert (callable with args target_op, *args, **kwargs): a callable that will be traced and become
|
28
|
+
a part of the final model instead of the target module. It accepts target_op as
|
29
|
+
the first parameter, target_op is callable that will appear as a single node in the
|
30
|
+
graph, the type of the node is target_op provided as another argument above.
|
31
|
+
"""
|
32
|
+
self.module = module
|
33
|
+
self.target_op = target_op
|
34
|
+
self.evaluate = evaluate
|
35
|
+
if self.evaluate is None:
|
36
|
+
self.evaluate = lambda module, *args, **kwargs: module(*args, **kwargs)
|
37
|
+
self.convert = convert
|
38
|
+
if self.convert is None:
|
39
|
+
self.convert = lambda module, target_op, *args, **kwargs: target_op(*args, **kwargs)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# flake8: noqa
|
5
|
+
# mypy: ignore-errors
|
6
|
+
|
7
|
+
import torch
|
8
|
+
from openvino.frontend.pytorch import ModuleExtension
|
9
|
+
|
10
|
+
|
11
|
+
class no_jit_trace:
|
12
|
+
def __enter__(self):
|
13
|
+
self.state = torch._C._get_tracing_state()
|
14
|
+
torch._C._set_tracing_state(None)
|
15
|
+
|
16
|
+
def __exit__(self, *args):
|
17
|
+
torch._C._set_tracing_state(self.state)
|
18
|
+
self.state = None
|
19
|
+
|
20
|
+
|
21
|
+
def patch_model(model, module_extensions, orig_forward_name):
|
22
|
+
def module_patcher(m, name):
|
23
|
+
extension = None
|
24
|
+
if m in module_extensions:
|
25
|
+
extension = module_extensions[m]
|
26
|
+
elif m.__class__ in module_extensions:
|
27
|
+
extension = module_extensions[m.__class__]
|
28
|
+
elif name in module_extensions:
|
29
|
+
extension = module_extensions[name]
|
30
|
+
|
31
|
+
if extension:
|
32
|
+
# The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
|
33
|
+
class Trampoline(torch.autograd.Function):
|
34
|
+
target_extension = extension
|
35
|
+
original_module = m
|
36
|
+
stashed_args = None
|
37
|
+
stashed_kwargs = None
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
@torch.jit.ignore
|
41
|
+
def forward(*args, **kwargs):
|
42
|
+
with no_jit_trace():
|
43
|
+
# `module` is going to be passed to a user-defined function `evaluate`
|
44
|
+
# `module` is patched: forward function was replaced, and we are actually in this patched function right in this code
|
45
|
+
# if we pass `module` as-is to the user code below, and it happens to call forward it will lead to infinite recursion or fail
|
46
|
+
# so we need to temporary patch the module back to the original forward and then return it back again
|
47
|
+
# stash the current forward to be able to return it back
|
48
|
+
patched_forward = m.forward
|
49
|
+
# set original forward for the module
|
50
|
+
m.forward = getattr(m, orig_forward_name)
|
51
|
+
# call user code
|
52
|
+
results = extension.evaluate(
|
53
|
+
m, *Trampoline.stashed_args, **Trampoline.stashed_kwargs) # call user code
|
54
|
+
m.forward = patched_forward # return patched forward back
|
55
|
+
return results
|
56
|
+
|
57
|
+
def new_forward(*args, **kwargs):
|
58
|
+
Trampoline.stashed_args = args
|
59
|
+
Trampoline.stashed_kwargs = kwargs
|
60
|
+
return extension.convert(m, Trampoline.apply, *args, **kwargs)
|
61
|
+
setattr(m, orig_forward_name, m.forward)
|
62
|
+
m.forward = new_forward
|
63
|
+
|
64
|
+
for name, m in model.named_modules():
|
65
|
+
if hasattr(m, orig_forward_name):
|
66
|
+
# already patched, skipping with a warning because it is unexpected
|
67
|
+
print(f'[ WARNING ] Unexpectedly found already patched module {name} while applying ModuleExtension during PyTorch model conversion. '
|
68
|
+
'Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model.')
|
69
|
+
continue
|
70
|
+
module_patcher(m, name)
|
71
|
+
|
72
|
+
|
73
|
+
def unpatch_model(model, orig_forward_name):
|
74
|
+
for _, m in model.named_modules():
|
75
|
+
if hasattr(m, orig_forward_name):
|
76
|
+
try:
|
77
|
+
m.forward = getattr(m, orig_forward_name)
|
78
|
+
delattr(m, orig_forward_name)
|
79
|
+
except Exception as error:
|
80
|
+
print('[ WARNING ] Exception raised during model unpatching. Depending on the exact issue it may lead to broken original model.')
|
81
|
+
print('Original exception details:')
|
82
|
+
print(error)
|
83
|
+
|
84
|
+
|
85
|
+
def __make_16bit_traceable(model: torch.nn.Module):
|
86
|
+
# Replace torch.nn.Linear with ModuleExtension and move other modules to fp32
|
87
|
+
extensions = {torch.nn.Linear: ModuleExtension(
|
88
|
+
torch.nn.Linear,
|
89
|
+
"aten::linear",
|
90
|
+
evaluate=lambda module, *args, **kwargs: torch.ones(
|
91
|
+
list(args[0].shape[:-1]) + [module.out_features], dtype=torch.float32) * 0.5,
|
92
|
+
convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
|
93
|
+
}
|
94
|
+
patch_model(model, extensions,
|
95
|
+
"_openvino_module_extension_patch_orig_forward")
|
96
|
+
for _, module in model.named_modules():
|
97
|
+
if module.__class__ not in extensions and hasattr(module, "weight") and module.weight.dtype in [torch.float16, torch.bfloat16]:
|
98
|
+
module.float()
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# flake8: noqa
|
5
|
+
# mypy: ignore-errors
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
from functools import partial
|
10
|
+
from hashlib import sha256
|
11
|
+
|
12
|
+
import torch
|
13
|
+
from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
|
14
|
+
from torch._dynamo.backends.registry import register_backend
|
15
|
+
from torch._inductor.compile_fx import compile_fx
|
16
|
+
from torch.fx.experimental.proxy_tensor import make_fx
|
17
|
+
from torch._decomp import decomposition_table, get_decompositions
|
18
|
+
|
19
|
+
from openvino.frontend import FrontEndManager
|
20
|
+
from openvino.runtime import Core, Type, PartialShape
|
21
|
+
from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder
|
22
|
+
from openvino.frontend.pytorch.torchdynamo import decompositions
|
23
|
+
from openvino.frontend.pytorch.torchdynamo.decompositions import get_aot_decomposition_list, get_inf_decomposition_list
|
24
|
+
from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
|
25
|
+
from openvino.frontend.pytorch.torchdynamo.execute import execute, execute_cached
|
26
|
+
from openvino.frontend.pytorch.torchdynamo.compile import cached_model_name, openvino_compile_cached_model
|
27
|
+
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_model_caching, _get_decompositions, _get_aot_autograd
|
28
|
+
|
29
|
+
from openvino.runtime import Core, Type, PartialShape
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
logger.setLevel(logging.WARNING)
|
33
|
+
|
34
|
+
"""
|
35
|
+
This is a preview feature in OpenVINO. This feature
|
36
|
+
enables users to compile PyTorch models using torch.compile
|
37
|
+
with OpenVINO as a target backend in PyTorch applications
|
38
|
+
|
39
|
+
Sample usage:
|
40
|
+
This sample code loads resnet50 torchvision model and compiles it using torch dynamo.
|
41
|
+
We can then use this model for inference. We only need to add two lines of code to
|
42
|
+
the Pytorch applications which are marked in the code below
|
43
|
+
|
44
|
+
1) import openvino.torch
|
45
|
+
model = torchvision.models.resnet50()
|
46
|
+
2) model = torch.compile(model, backend="openvino")
|
47
|
+
"""
|
48
|
+
|
49
|
+
openvino_options = {}
|
50
|
+
|
51
|
+
@register_backend
|
52
|
+
@fake_tensor_unsupported
|
53
|
+
def openvino(subgraph, example_inputs, options=None):
|
54
|
+
if (_get_aot_autograd(options)):
|
55
|
+
global openvino_options
|
56
|
+
openvino_options = options
|
57
|
+
decompositions = _get_decompositions(options) + get_inf_decomposition_list()
|
58
|
+
decompositions = decompositions + get_aot_decomposition_list()
|
59
|
+
return aot_autograd(fw_compiler=fx_openvino,
|
60
|
+
bw_compiler=fx_openvino,
|
61
|
+
decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
|
62
|
+
return fx_openvino(subgraph, example_inputs, options)
|
63
|
+
|
64
|
+
def fx_openvino(subgraph, example_inputs, options=None):
|
65
|
+
try:
|
66
|
+
if len(openvino_options) != 0:
|
67
|
+
options = openvino_options
|
68
|
+
executor_parameters = None
|
69
|
+
inputs_reversed = False
|
70
|
+
openvino_model_caching = _get_model_caching(options)
|
71
|
+
if openvino_model_caching is not None and openvino_model_caching:
|
72
|
+
# Create a hash to be used for caching
|
73
|
+
model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
|
74
|
+
executor_parameters = {"model_hash_str": model_hash_str}
|
75
|
+
# Check if the model was fully supported and already cached
|
76
|
+
example_inputs.reverse()
|
77
|
+
inputs_reversed = True
|
78
|
+
maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", _get_device(options), example_inputs, _get_cache_dir(options))
|
79
|
+
if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
|
80
|
+
# Model is fully supported and already cached. Run the cached OV model directly.
|
81
|
+
compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, options, *example_inputs)
|
82
|
+
def _call(*args):
|
83
|
+
res = execute_cached(compiled_model, *args)
|
84
|
+
return res
|
85
|
+
return _call
|
86
|
+
if inputs_reversed:
|
87
|
+
example_inputs.reverse()
|
88
|
+
|
89
|
+
if (_get_aot_autograd(options)):
|
90
|
+
model = subgraph
|
91
|
+
else:
|
92
|
+
from torch._subclasses.fake_tensor import FakeTensorMode
|
93
|
+
decompositions = _get_decompositions(options) + get_inf_decomposition_list()
|
94
|
+
with FakeTensorMode(allow_non_fake_inputs=True):
|
95
|
+
model = make_fx(subgraph, decomposition_table=get_decompositions(decompositions))(*example_inputs)
|
96
|
+
|
97
|
+
with torch.no_grad():
|
98
|
+
model.eval()
|
99
|
+
|
100
|
+
partitioner = Partitioner(options)
|
101
|
+
compiled_model = partitioner.make_partitions(model, options)
|
102
|
+
|
103
|
+
if executor_parameters is not None and 'model_hash_str' in executor_parameters:
|
104
|
+
# Check if the model is fully supported.
|
105
|
+
fully_supported = partitioner.check_fully_supported(compiled_model)
|
106
|
+
if fully_supported:
|
107
|
+
executor_parameters["model_hash_str"] += "_fs"
|
108
|
+
|
109
|
+
def _call(*args):
|
110
|
+
res = execute(compiled_model, *args, executor="openvino",
|
111
|
+
executor_parameters=executor_parameters, options=options)
|
112
|
+
return res
|
113
|
+
return _call
|
114
|
+
except Exception as e:
|
115
|
+
logger.debug(f"Failed in OpenVINO execution: {e}")
|
116
|
+
return compile_fx(subgraph, example_inputs)
|
117
|
+
|
118
|
+
def reset():
|
119
|
+
clear_caches()
|
intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend_utils.py
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# flake8: noqa
|
5
|
+
# mypy: ignore-errors
|
6
|
+
|
7
|
+
from typing import Optional, Any
|
8
|
+
from openvino.runtime import Core
|
9
|
+
|
10
|
+
|
11
|
+
def _get_device(options) -> Optional[Any]:
|
12
|
+
core = Core()
|
13
|
+
device = "CPU"
|
14
|
+
|
15
|
+
if options is not None and "device" in options:
|
16
|
+
device = options["device"]
|
17
|
+
|
18
|
+
if device is not None:
|
19
|
+
assert device in core.available_devices, (
|
20
|
+
"Specified device "
|
21
|
+
+ device
|
22
|
+
+ " is not in the list of OpenVINO Available Devices"
|
23
|
+
)
|
24
|
+
else:
|
25
|
+
device = "CPU"
|
26
|
+
return device
|
27
|
+
|
28
|
+
def _is_cache_dir_in_config(options) -> Optional[Any]:
|
29
|
+
if options is not None and "config" in options:
|
30
|
+
cfg = options["config"]
|
31
|
+
if cfg is not None and "CACHE_DIR" in cfg:
|
32
|
+
return True
|
33
|
+
return False
|
34
|
+
|
35
|
+
|
36
|
+
def _get_cache_dir(options) -> Optional[Any]:
|
37
|
+
cache_dir = "./cache"
|
38
|
+
if options is not None and "cache_dir" in options:
|
39
|
+
cache_dir = options["cache_dir"]
|
40
|
+
if _is_cache_dir_in_config(options):
|
41
|
+
cache_dir = options["config"]["CACHE_DIR"]
|
42
|
+
return cache_dir
|
43
|
+
|
44
|
+
|
45
|
+
def _get_aot_autograd(options) -> Optional[Any]:
|
46
|
+
if options is not None and "aot_autograd" in options:
|
47
|
+
aot_autograd = options["aot_autograd"]
|
48
|
+
if bool(aot_autograd) and str(aot_autograd).lower() not in ["false", "0"]:
|
49
|
+
return True
|
50
|
+
else:
|
51
|
+
return False
|
52
|
+
|
53
|
+
|
54
|
+
def _get_model_caching(options) -> Optional[Any]:
|
55
|
+
if options is not None and "model_caching" in options:
|
56
|
+
caching = options["model_caching"]
|
57
|
+
if bool(caching) and str(caching).lower() not in ["false", "0"]:
|
58
|
+
return True
|
59
|
+
return False
|
60
|
+
|
61
|
+
|
62
|
+
def _get_config(options) -> Optional[Any]:
|
63
|
+
if options is not None and "config" in options:
|
64
|
+
return options["config"]
|
65
|
+
return {}
|
66
|
+
|
67
|
+
def _get_decompositions(options) -> Optional[Any]:
|
68
|
+
decompositions = []
|
69
|
+
if options is not None and "decompositions" in options:
|
70
|
+
decompositions = options["decompositions"]
|
71
|
+
return decompositions
|
72
|
+
|
73
|
+
def _get_disabled_ops(options) -> Optional[Any]:
|
74
|
+
disabled_ops = []
|
75
|
+
if options is not None and "disabled_ops" in options:
|
76
|
+
disabled_ops = options["disabled_ops"]
|
77
|
+
return disabled_ops
|
78
|
+
|
79
|
+
def _is_testing(options) -> Optional[Any]:
|
80
|
+
if options is not None and "testing" in options:
|
81
|
+
is_testing = options["testing"]
|
82
|
+
if bool(is_testing) and str(is_testing).lower not in ["false", "0"]:
|
83
|
+
return True
|
84
|
+
return False
|
85
|
+
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
4
|
+
|
5
|
+
# flake8: noqa
|
6
|
+
# mypy: ignore-errors
|
7
|
+
|
8
|
+
import os
|
9
|
+
import torch
|
10
|
+
import torch.overrides
|
11
|
+
|
12
|
+
from hashlib import sha256
|
13
|
+
from torch.fx import GraphModule
|
14
|
+
|
15
|
+
from openvino.frontend import FrontEndManager
|
16
|
+
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
|
17
|
+
from openvino.runtime import Core, Type, PartialShape, serialize
|
18
|
+
from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_config, _is_cache_dir_in_config
|
19
|
+
|
20
|
+
from typing import Callable, Optional
|
21
|
+
|
22
|
+
import logging
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
logger.setLevel(logging.WARNING)
|
25
|
+
|
26
|
+
def cached_model_name(model_hash_str, device, args, cache_root, reversed = False):
|
27
|
+
if model_hash_str is None:
|
28
|
+
return None
|
29
|
+
|
30
|
+
model_cache_dir = cache_root + "/model/"
|
31
|
+
|
32
|
+
try:
|
33
|
+
os.makedirs(model_cache_dir, exist_ok=True)
|
34
|
+
file_name = model_cache_dir + model_hash_str + "_" + device
|
35
|
+
except OSError as error:
|
36
|
+
logger.warning(f"Cache directory {cache_root} cannot be created. Model caching is disabled. Error: {error }")
|
37
|
+
return None
|
38
|
+
|
39
|
+
inputs_str = ""
|
40
|
+
for idx, input_data in enumerate(args):
|
41
|
+
if reversed:
|
42
|
+
inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
|
43
|
+
else:
|
44
|
+
inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
|
45
|
+
inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest()
|
46
|
+
file_name += inputs_str
|
47
|
+
|
48
|
+
return file_name
|
49
|
+
|
50
|
+
def openvino_compile_cached_model(cached_model_path, options, *example_inputs):
|
51
|
+
core = Core()
|
52
|
+
om = core.read_model(cached_model_path + ".xml")
|
53
|
+
|
54
|
+
dtype_mapping = {
|
55
|
+
torch.float32: Type.f32,
|
56
|
+
torch.float64: Type.f64,
|
57
|
+
torch.float16: Type.f16,
|
58
|
+
torch.int64: Type.i64,
|
59
|
+
torch.int32: Type.i32,
|
60
|
+
torch.uint8: Type.u8,
|
61
|
+
torch.int8: Type.i8,
|
62
|
+
torch.bool: Type.boolean
|
63
|
+
}
|
64
|
+
|
65
|
+
for idx, input_data in enumerate(example_inputs):
|
66
|
+
om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
|
67
|
+
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
|
68
|
+
om.validate_nodes_and_infer_types()
|
69
|
+
|
70
|
+
config = {}
|
71
|
+
|
72
|
+
if _is_cache_dir_in_config(options):
|
73
|
+
config = _get_config(options)
|
74
|
+
else:
|
75
|
+
config["CACHE_DIR"] = _get_cache_dir(options)
|
76
|
+
|
77
|
+
compiled_model = core.compile_model(om, _get_device(options), config)
|
78
|
+
|
79
|
+
return compiled_model
|
80
|
+
|
81
|
+
def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None, options=None):
|
82
|
+
core = Core()
|
83
|
+
|
84
|
+
device = _get_device(options)
|
85
|
+
cache_root = _get_cache_dir(options)
|
86
|
+
file_name = cached_model_name(model_hash_str, device, args, cache_root)
|
87
|
+
|
88
|
+
if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"):
|
89
|
+
om = core.read_model(file_name + ".xml")
|
90
|
+
else:
|
91
|
+
fe_manager = FrontEndManager()
|
92
|
+
fe = fe_manager.load_by_framework("pytorch")
|
93
|
+
|
94
|
+
input_shapes = []
|
95
|
+
input_types = []
|
96
|
+
for idx, input_data in enumerate(args):
|
97
|
+
if isinstance(input_data, int):
|
98
|
+
input_types.append(torch.int64)
|
99
|
+
input_shapes.append(torch.Size([1]))
|
100
|
+
else:
|
101
|
+
input_types.append(input_data.type())
|
102
|
+
input_shapes.append(input_data.size())
|
103
|
+
|
104
|
+
decoder = TorchFXPythonDecoder(gm)
|
105
|
+
|
106
|
+
im = fe.load(decoder)
|
107
|
+
|
108
|
+
om = fe.convert(im)
|
109
|
+
|
110
|
+
if file_name is not None:
|
111
|
+
serialize(om, file_name + ".xml", file_name + ".bin")
|
112
|
+
|
113
|
+
dtype_mapping = {
|
114
|
+
torch.float32: Type.f32,
|
115
|
+
torch.float64: Type.f64,
|
116
|
+
torch.float16: Type.f16,
|
117
|
+
torch.int64: Type.i64,
|
118
|
+
torch.int32: Type.i32,
|
119
|
+
torch.uint8: Type.u8,
|
120
|
+
torch.int8: Type.i8,
|
121
|
+
torch.bool: Type.boolean
|
122
|
+
}
|
123
|
+
|
124
|
+
for idx, input_data in enumerate(args):
|
125
|
+
if isinstance(input_data, int):
|
126
|
+
om.inputs[idx].get_node().set_element_type(dtype_mapping[torch.int64])
|
127
|
+
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1]))))
|
128
|
+
else:
|
129
|
+
om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
|
130
|
+
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(decoder.input_shapes[idx])))
|
131
|
+
|
132
|
+
om.validate_nodes_and_infer_types()
|
133
|
+
|
134
|
+
config = _get_config(options)
|
135
|
+
|
136
|
+
if model_hash_str is not None:
|
137
|
+
if not _is_cache_dir_in_config(options):
|
138
|
+
config["CACHE_DIR"] = cache_root
|
139
|
+
|
140
|
+
compiled = core.compile_model(om, device, config)
|
141
|
+
return compiled
|
intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Copyright (C) 2018-2024 Intel Corporation
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
4
|
+
|
5
|
+
# flake8: noqa
|
6
|
+
# mypy: ignore-errors
|
7
|
+
|
8
|
+
import torch
|
9
|
+
from torch._decomp.decompositions import aten, pw_cast_for_opmath
|
10
|
+
from torch._decomp import register_decomposition, get_decompositions
|
11
|
+
|
12
|
+
|
13
|
+
@register_decomposition(aten.convolution_backward)
|
14
|
+
@pw_cast_for_opmath
|
15
|
+
def convolution_backward(
|
16
|
+
grad_output,
|
17
|
+
inp,
|
18
|
+
weight,
|
19
|
+
bias,
|
20
|
+
stride,
|
21
|
+
padding,
|
22
|
+
dilation,
|
23
|
+
transposed,
|
24
|
+
output_padding,
|
25
|
+
groups,
|
26
|
+
output_mask,
|
27
|
+
):
|
28
|
+
if stride == [2, 2]:
|
29
|
+
output_padding = [1, 1]
|
30
|
+
|
31
|
+
# Compute the gradient of the input tensor
|
32
|
+
grad_input = torch.nn.functional.conv_transpose2d(
|
33
|
+
grad_output, weight, stride=stride, padding=padding, dilation=dilation, groups=groups, output_padding=output_padding
|
34
|
+
)
|
35
|
+
|
36
|
+
# Compute the gradient of the weight tensor
|
37
|
+
grad_weight = torch.nn.functional.conv_transpose2d(
|
38
|
+
inp, weight.transpose(0, 1), stride=stride, padding=padding, dilation=dilation, groups=groups, output_padding=output_padding
|
39
|
+
)
|
40
|
+
|
41
|
+
# Compute the gradient of the bias tensor
|
42
|
+
if bias is not None:
|
43
|
+
grad_bias = grad_output.sum([0, 2, 3], keepdim=True)
|
44
|
+
else:
|
45
|
+
grad_bias = None
|
46
|
+
|
47
|
+
return grad_input, grad_weight, grad_bias
|
48
|
+
|
49
|
+
if len(get_decompositions([aten._scaled_dot_product_flash_attention.default])) == 0:
|
50
|
+
@register_decomposition(aten._scaled_dot_product_flash_attention.default)
|
51
|
+
def scaled_dot_product_flash_attention(
|
52
|
+
query,
|
53
|
+
key,
|
54
|
+
value,
|
55
|
+
dropout_p=0.0,
|
56
|
+
is_causal=False,
|
57
|
+
*,
|
58
|
+
return_debug_mask=False,
|
59
|
+
scale=None,
|
60
|
+
):
|
61
|
+
batch_size, num_head, q_size, head_size = (
|
62
|
+
query.shape[0],
|
63
|
+
query.shape[1],
|
64
|
+
query.shape[2],
|
65
|
+
query.shape[3],
|
66
|
+
)
|
67
|
+
|
68
|
+
logsumexp = torch.empty([batch_size, q_size, num_head, head_size], dtype=torch.float)
|
69
|
+
cum_seq_q, cum_seq_k = torch.empty([], dtype=torch.long), torch.empty(
|
70
|
+
[], dtype=torch.long
|
71
|
+
)
|
72
|
+
max_q, max_k = 0, 0
|
73
|
+
philox_seed, philox_offset = torch.empty([], dtype=torch.long), torch.empty(
|
74
|
+
[], dtype=torch.long
|
75
|
+
)
|
76
|
+
debug_attn_mask = torch.empty(
|
77
|
+
[],
|
78
|
+
dtype=query.dtype,
|
79
|
+
device=query.device,
|
80
|
+
requires_grad=query.requires_grad,
|
81
|
+
)
|
82
|
+
output, _ = aten._scaled_dot_product_attention_math.default(
|
83
|
+
query, key, value, None, dropout_p, is_causal, None, scale=scale
|
84
|
+
)
|
85
|
+
|
86
|
+
scores = torch.matmul(query, key.transpose(-2, -1)) / (key.size(-1) ** 0.5)
|
87
|
+
logsumexp = torch.logsumexp(scores, dim=-1)
|
88
|
+
|
89
|
+
output = output.transpose(1, 2).contiguous(memory_format=torch.contiguous_format)
|
90
|
+
return (
|
91
|
+
output.transpose(1, 2),
|
92
|
+
logsumexp,
|
93
|
+
cum_seq_q,
|
94
|
+
cum_seq_k,
|
95
|
+
max_q,
|
96
|
+
max_k,
|
97
|
+
philox_seed,
|
98
|
+
philox_offset,
|
99
|
+
debug_attn_mask,
|
100
|
+
)
|
101
|
+
|
102
|
+
|
103
|
+
def get_aot_decomposition_list():
|
104
|
+
return ([torch.ops.aten._scaled_dot_product_flash_attention.default,
|
105
|
+
torch.ops.aten._softmax.default,
|
106
|
+
torch.ops.aten._softmax_backward_data.default,
|
107
|
+
torch.ops.aten.convolution_backward.default,
|
108
|
+
torch.ops.aten.gelu_backward.default,
|
109
|
+
torch.ops.aten.native_group_norm.default,
|
110
|
+
torch.ops.aten.native_group_norm_backward.default,
|
111
|
+
torch.ops.aten.native_layer_norm.default,
|
112
|
+
torch.ops.aten.native_layer_norm_backward.default,
|
113
|
+
torch.ops.aten.slice_backward.default])
|
114
|
+
|
115
|
+
def get_inf_decomposition_list():
|
116
|
+
return ([torch.ops.aten.nll_loss_forward.default])
|