PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +1 -1
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
msprobe/README.md +46 -16
msprobe/__init__.py +16 -1
msprobe/config.json +0 -2
msprobe/core/advisor/advisor.py +8 -8
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +64 -3
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +54 -9
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +21 -11
msprobe/core/common/utils.py +153 -167
msprobe/core/common_config.py +18 -25
msprobe/core/compare/acc_compare.py +209 -36
msprobe/core/compare/check.py +102 -17
msprobe/core/compare/compare_cli.py +21 -1
msprobe/core/compare/highlight.py +41 -5
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +21 -6
msprobe/core/compare/utils.py +82 -48
msprobe/core/data_dump/data_collector.py +31 -32
msprobe/core/data_dump/data_processor/base.py +45 -22
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +11 -5
msprobe/core/data_dump/data_processor/pytorch_processor.py +24 -7
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +32 -16
msprobe/core/grad_probe/constant.py +4 -0
msprobe/core/grad_probe/grad_compare.py +2 -3
msprobe/core/grad_probe/utils.py +16 -3
msprobe/docs/01.installation.md +19 -9
msprobe/docs/02.config_introduction.md +52 -80
msprobe/docs/03.config_examples.md +3 -13
msprobe/docs/04.acl_config_examples.md +11 -9
msprobe/docs/05.data_dump_PyTorch.md +140 -12
msprobe/docs/06.data_dump_MindSpore.md +47 -5
msprobe/docs/07.accuracy_checker_PyTorch.md +57 -34
msprobe/docs/08.accuracy_checker_online_PyTorch.md +51 -11
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +181 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +162 -31
msprobe/docs/13.overflow_check_MindSpore.md +1 -1
msprobe/docs/15.free_benchmarking_PyTorch.md +59 -53
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/docs/17.grad_probe.md +14 -16
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +22 -10
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +35 -11
msprobe/mindspore/api_accuracy_checker/api_info.py +7 -0
msprobe/mindspore/cell_processor.py +27 -3
msprobe/mindspore/common/const.py +2 -0
msprobe/mindspore/common/utils.py +18 -2
msprobe/mindspore/compare/distributed_compare.py +9 -22
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +173 -35
msprobe/mindspore/compare/ms_graph_compare.py +27 -11
msprobe/mindspore/debugger/debugger_config.py +16 -13
msprobe/mindspore/debugger/precision_debugger.py +37 -13
msprobe/mindspore/dump/dump_tool_factory.py +16 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +11 -1
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +82 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +41 -17
msprobe/mindspore/dump/kernel_graph_dump.py +19 -3
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -4
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +19 -4
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -0
msprobe/mindspore/free_benchmark/common/utils.py +19 -5
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +16 -2
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +18 -3
msprobe/mindspore/free_benchmark/handler/base_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/fix_handler.py +15 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +18 -3
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +44 -18
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +18 -4
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +20 -5
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +15 -0
msprobe/mindspore/grad_probe/global_context.py +18 -8
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/service.py +42 -123
msprobe/pytorch/__init__.py +20 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +19 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +47 -21
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +67 -32
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +26 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +19 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +51 -125
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +146 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +21 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +78 -33
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +36 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +15 -0
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +21 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +180 -151
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +28 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +20 -5
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +26 -11
msprobe/pytorch/common/utils.py +40 -35
msprobe/pytorch/compare/distributed_compare.py +11 -11
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +38 -6
msprobe/pytorch/debugger/debugger_config.py +52 -39
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/enums.py +28 -0
msprobe/pytorch/free_benchmark/common/params.py +15 -0
msprobe/pytorch/free_benchmark/common/utils.py +17 -1
msprobe/pytorch/free_benchmark/compare/grad_saver.py +28 -7
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +15 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +26 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +55 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +2 -2
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +10 -11
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +17 -2
msprobe/pytorch/online_dispatch/compare.py +11 -12
msprobe/pytorch/online_dispatch/single_compare.py +7 -7
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +8 -0
msprobe/pytorch/online_dispatch/utils.py +1 -4
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +9 -10
msprobe/pytorch/parse_tool/lib/parse_tool.py +3 -0
msprobe/pytorch/parse_tool/lib/utils.py +28 -24
msprobe/pytorch/parse_tool/lib/visualization.py +1 -1
msprobe/pytorch/pt_config.py +167 -38
msprobe/pytorch/service.py +97 -32
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0

msprobe/pytorch/bench_functions/npu_fusion_attention.py CHANGED Viewed

@@ -1,6 +1,39 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+# 前向函数声明对比
+标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob
+融合算子:npu_fusion_attention_forward: query, key, value, head_num, input_layout, *, pse=None, padding_mask=None,
+                                      atten_mask=None, scale=1.0, keep_prob=1.0, pre_tockens=2147483647,
+                                      next_tockens=2147483647, inner_precise=0, prefix=None, sparse_mode=0,
+                                      gen_mask_parallel=True, sync=False
+# 反向函数声明对比
+标杆实现:fusion_attention_backward: dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob
+融合算子:npu_fusion_attention_backward: query, key, value, dy, head_num, input_layout, *, pse=None, padding_mask=None,
+                                       atten_mask=None, softmax_max=None, softmax_sum=None, softmax_in=None,
+                                       attention_in=None, scale_value=1.0, keep_prob=1.0, pre_tockens=2147483647,
+                                       next_tockens=2147483647, inner_precise=0, seed=0, offset=0,
+                                       numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False
+"""
 import torch
 import numpy as np
 from einops import rearrange
 try:
     import torch_npu
 except ImportError:
@@ -9,35 +42,17 @@ except ImportError:
         # flash_attn为gpu的fa三方库
         from flash_attn import flash_attn_func
     except ImportError:
-        #如果为cpu的ut环境，则不做任何处理
+        # 如果为cpu的ut环境，则不做任何处理
         pass
 else:
     is_gpu = False
 from msprobe.pytorch.common.utils import logger
 from msprobe.core.common.const import Const, CompareConst
 gtype = torch.float64  # arm host必须选择float64，x86环境选择float32即可，64也行。arm计算很慢，s=8k的场景建议使用x86
 softmax_build_mode = "QKV"  # "MAX_SUM"
-"""
-# 前向函数声明对比
-标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob
-融合算子:npu_fusion_attention_forward: query, key, value, head_num, input_layout, *, pse=None, padding_mask=None,
-                                      atten_mask=None, scale=1.0, keep_prob=1.0, pre_tockens=2147483647,
-                                      next_tockens=2147483647, inner_precise=0, prefix=None, sparse_mode=0,
-                                      gen_mask_parallel=True, sync=False
-# 反向函数声明对比
-标杆实现:fusion_attention_backward: dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob
-融合算子:npu_fusion_attention_backward: query, key, value, dy, head_num, input_layout, *, pse=None, padding_mask=None,
-                                       atten_mask=None, softmax_max=None, softmax_sum=None, softmax_in=None,
-                                       attention_in=None, scale_value=1.0, keep_prob=1.0, pre_tockens=2147483647,
-                                       next_tockens=2147483647, inner_precise=0, seed=0, offset=0,
-                                       numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False
-"""
 def softmax_forward(x):
     x_max = torch.max(x, dim=-1, keepdims=True)[0]
@@ -62,10 +77,10 @@ def broadcast_kv(num_heads, num_kv_heads, kv_tensor, dtype):
     factor = num_heads // num_kv_heads
     kv_shape = kv_tensor.shape
-    B = kv_shape[0]
-    S = kv_shape[2]
-    D = kv_shape[3]
-    kv_res = torch.zeros([B, num_heads, S, D]).to(dtype)
+    b = kv_shape[0]
+    s = kv_shape[2]
+    d = kv_shape[3]
+    kv_res = torch.zeros([b, num_heads, s, d]).to(dtype)
     for i in range(num_heads):
         j = i // factor
         kv_res[:, i:i + 1, :, :] = kv_tensor[:, j:j + 1, :, :]
@@ -112,7 +127,7 @@ def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, k
 def parse_bsnd_args(query, key, head_num, input_layout):
     supported_input_layout = ["BSH", "SBH", "BSND", "BNSD", "TND"]
-    B, S1, S2, N1, N2, D, H1, H2 = None, None, None, head_num, None, None, None, None
+    b, s1, s2, n1, n2, d, h1, h2 = None, None, None, head_num, None, None, None, None
     if not isinstance(input_layout, str) or input_layout not in supported_input_layout:
         raise ValueError(f"Invalid input_layout arg which must be one of {supported_input_layout}.")
@@ -121,32 +136,33 @@ def parse_bsnd_args(query, key, head_num, input_layout):
         raise ValueError(f"input_layout {input_layout} does not supported for now.")
     try:
         if input_layout == "BSH":
-            B, S1, H1 = query.shape
-            _, S2, H2 = key.shape
-            D = H1 // N1
-            N2 = H2 // D
+            b, s1, h1 = query.shape
+            _, s2, h2 = key.shape
+            d = h1 // n1
+            n2 = h2 // d
         elif input_layout == "SBH":
-            S1, B, H1 = query.shape
-            S2, _, H2 = key.shape
-            D = H1 // N1
-            N2 = H2 // D
+            s1, b, h1 = query.shape
+            s2, _, h2 = key.shape
+            d = h1 // n1
+            n2 = h2 // d
         elif input_layout == "BSND":
-            B, S1, N1, D = query.shape
-            _, S2, N2, _ = key.shape
-            H1 = N1 * D
-            H2 = N2 * D
+            b, s1, n1, d = query.shape
+            _, s2, n2, _ = key.shape
+            h1 = n1 * d
+            h2 = n2 * d
         elif input_layout == "BNSD":
-            B, N1, S1, D = query.shape
-            _, N2, S2, _ = key.shape
-            H1 = N1 * D
-            H2 = N2 * D
+            b, n1, s1, d = query.shape
+            _, n2, s2, _ = key.shape
+            h1 = n1 * d
+            h2 = n2 * d
     except Exception as e:
         raise ValueError(f"query.shape: {query.shape}, key.shape: {key.shape}, parse_bsnd_args error: {e}") from e
-    if D == 0:
-        raise ValueError(f"Value D must be non-zero.")
-    DTYPE = query.dtype
-    return B, S1, S2, N1, N2, D, H1, H2, DTYPE
+    if d == 0:
+        raise ValueError(f"Value d must be non-zero.")
+    _dtype = query.dtype
+    ret = (b, s1, s2, n1, n2, d, h1, h2, _dtype)
+    return ret
 def convert_from_bnsd(_input, input_layout):
@@ -186,24 +202,26 @@ def convert_to_bnsd(_input, n, input_layout):
     return out.to(gtype)
-def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next_tocken, dtype):
+def generate_atten_mask(*args):
     """
     # 当sparse_mode=2、3、4时小算子到融合算子会走这个优化，反过来看就要拆解回原来的基本实现
     ===> atten_mask = torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(dtype)
     """
-    shape = [S1, S2]
+    sparse_mode, atten_mask, b, n1, s1, s2, pre_tocken, next_tocken, dtype = args
+    shape = [s1, s2]
     if atten_mask is not None:
         # 当FA的输入已经包含atten_mask时，可以认为已经是转换之后的mask矩阵了，有三种特殊场景，即稀疏矩阵场景，需要进行逆向还原
         if sparse_mode == 2 or sparse_mode == 3 or sparse_mode == 4:
-            logger.info(f"S1: {S1}, S2:{S2}, atten_mask.shape:{atten_mask.shape}, atten_mask.dtype:{atten_mask.dtype}")
+            logger.info(f"s1: {s1}, s2:{s2}, atten_mask.shape:{atten_mask.shape}, atten_mask.dtype:{atten_mask.dtype}")
             if atten_mask.dim() == 2 and atten_mask.shape[0] == 2048 and atten_mask.shape[1] == 2048:
                 if atten_mask.equal(torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(atten_mask.dtype)):
                     if sparse_mode == 2:
                         atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1))
                     elif sparse_mode == 3:
-                        atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1))
+                        atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=s2 - s1 + 1))
                     elif sparse_mode == 4:
                         atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1))
                         atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1))
@@ -215,14 +233,14 @@ def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next
     if atten_mask is not None:
         if atten_mask.dim() == 2:
-            if atten_mask.shape[0] != S1 or atten_mask.shape[1] != S2:
+            if atten_mask.shape[0] != s1 or atten_mask.shape[1] != s2:
                 raise ValueError(f"Invalid atten_mask shape `SS` {atten_mask.shape}")
-            shape = [S1, S2]
+            shape = [s1, s2]
         elif atten_mask.dim() == 4:
             if atten_mask.shape[1] == 1:
-                shape = [B, 1, S1, S2] if B != 1 else [1, 1, S1, S2]
+                shape = [b, 1, s1, s2] if b != 1 else [1, 1, s1, s2]
             else:
-                shape = [B, N1, S1, S2] if B != 1 else [1, N1, S1, S2]
+                shape = [b, n1, s1, s2] if b != 1 else [1, n1, s1, s2]
     if sparse_mode == 0:
         atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1))
@@ -233,7 +251,7 @@ def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next
     elif sparse_mode == 2:
         atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1))
     elif sparse_mode == 3:
-        atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1))
+        atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=s2 - s1 + 1))
     elif sparse_mode == 4:
         atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1))
         atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1))
@@ -243,11 +261,11 @@ def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next
     return atten_mask.to(dtype)
-def generate_kv(key, value, N1, N2):
+def generate_kv(key, value, n1, n2):
     # N不等长适配by cdy
-    if not (N1 == N2):
-        k_new = broadcast_kv(N1, N2, key, key.dtype)
-        v_new = broadcast_kv(N1, N2, value, value.dtype)
+    if not (n1 == n2):
+        k_new = broadcast_kv(n1, n2, key, key.dtype)
+        v_new = broadcast_kv(n1, n2, value, value.dtype)
     else:
         k_new = key
         v_new = value
@@ -305,26 +323,30 @@ def npu_fusion_attention_forward_patch(*args, **kwargs):
     head_num = get_head_num(*args, **kwargs)
     input_layout = get_input_layout(*args, **kwargs)
-    B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], head_num, input_layout)
-    if N1 == N2 and S1 == S2:
-        logger.debug(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}")
+    b, s1, s2, n1, n2, d, h1, h2, dtype = parse_bsnd_args(args[0], args[1], head_num, input_layout)
+    if n1 == n2 and s1 == s2:
+        logger.debug(f"running case : BNSD = {b}_{n1}_{s1}_{d}, sparse = {kwargs.get('sparse_mode', 0)}")
     else:
-        logger.debug(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}")
-    if not (N1 % N2 == 0 and N1 >= N2):
-        raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.")
-    dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2,
-                   "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE}
-    new_kwargs = {"keep_prob": 1,
-                  "scale": kwargs.get("scale", 1 / (D ** 0.5)),
-                  "sparse_mode": kwargs.get("sparse_mode", 0),
-                  "prefix": kwargs.get("prefix"),
-                  "pre_tockens": kwargs.get("pre_tockens", 2147483647),
-                  "next_tockens": kwargs.get("next_tockens", 2147483647),
-                  "pse": kwargs.get("pse"),
-                  "padding_mask": kwargs.get("padding_mask"),
-                  "atten_mask": kwargs.get("atten_mask")}
+        logger.debug(f"running case: BNSD = {b}_{n1}({n2})_{s1}({s2})_{d}, sparse = {kwargs.get('sparse_mode', 0)}")
+    if not (n1 % n2 == 0 and n1 >= n2):
+        raise ValueError(f"N1与N2不匹配,请检查: n1 = {n1}, n2 = {n2}.")
+    dims_kwargs = {
+        "b": b, "s1": s1, "s2": s2, "n1": n1, "n2": n2,
+        "d": d, "h1": h1, "h2": h2, "dtype": dtype
+    }
+    new_kwargs = {
+        "keep_prob": 1,
+        "scale": kwargs.get("scale", 1 / (d ** 0.5)),
+        "sparse_mode": kwargs.get("sparse_mode", 0),
+        "prefix": kwargs.get("prefix"),
+        "pre_tockens": kwargs.get("pre_tockens", 2147483647),
+        "next_tockens": kwargs.get("next_tockens", 2147483647),
+        "pse": kwargs.get("pse"),
+        "padding_mask": kwargs.get("padding_mask"),
+        "atten_mask": kwargs.get("atten_mask")
+    }
     return args, dims_kwargs, new_kwargs
@@ -333,33 +355,37 @@ def npu_fusion_attention_backward_patch(*args, **kwargs):
     if len(args) != 6:
         raise ValueError(f"Unsupported npu_fusion_attention_grad args {args}.")
-    B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[4], args[5])
-    if N1 == N2 and S1 == S2:
-        logger.info(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}")
+    b, s1, s2, n1, n2, d, h1, h2, dtype = parse_bsnd_args(args[0], args[1], args[4], args[5])
+    if n1 == n2 and s1 == s2:
+        logger.info(f"running case : bnsd = {b}_{n1}_{s1}_{d}, sparse = {kwargs.get('sparse_mode', 0)}")
     else:
-        logger.info(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}")
-    if not (N1 % N2 == 0 and N1 >= N2):
-        raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.")
-    dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2,
-                   "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE}
-    new_kwargs = {"keep_prob": 1,
-                  "scale_value": kwargs.get("scale_value", 1 / (D ** 0.5)),
-                  "sparse_mode": kwargs.get("sparse_mode", 0),
-                  "prefix": kwargs.get("prefix"),
-                  "pre_tockens": kwargs.get("pre_tockens", 2147483647),
-                  "next_tockens": kwargs.get("next_tockens", 2147483647),
-                  "pse": kwargs.get("pse"),
-                  "padding_mask": kwargs.get("padding_mask"),
-                  "softmax_max": kwargs.get("softmax_max"),
-                  "softmax_sum": kwargs.get("softmax_sum"),
-                  "softmax_in": kwargs.get("softmax_in"),
-                  "attention_in": kwargs.get("attention_in"),
-                  "seed": kwargs.get("seed", 0),
-                  "offset": kwargs.get("offset", 0),
-                  "numels": kwargs.get("numels", 0),
-                  "atten_mask": kwargs.get("atten_mask")}
+        logger.info(f"running case: bnsd = {b}_{n1}({n2})_{s1}({s2})_{d}, sparse = {kwargs.get('sparse_mode', 0)}")
+    if not (n1 % n2 == 0 and n1 >= n2):
+        raise ValueError(f"N1与N2不匹配,请检查: n1 = {n1}, n2 = {n2}.")
+    dims_kwargs = {
+        "b": b, "s1": s1, "s2": s2, "n1": n1, "n2": n2,
+        "d": d, "h1": h1, "h2": h2, "dtype": dtype
+    }
+    new_kwargs = {
+        "keep_prob": 1,
+        "scale_value": kwargs.get("scale_value", 1 / (d ** 0.5)),
+        "sparse_mode": kwargs.get("sparse_mode", 0),
+        "prefix": kwargs.get("prefix"),
+        "pre_tockens": kwargs.get("pre_tockens", 2147483647),
+        "next_tockens": kwargs.get("next_tockens", 2147483647),
+        "pse": kwargs.get("pse"),
+        "padding_mask": kwargs.get("padding_mask"),
+        "softmax_max": kwargs.get("softmax_max"),
+        "softmax_sum": kwargs.get("softmax_sum"),
+        "softmax_in": kwargs.get("softmax_in"),
+        "attention_in": kwargs.get("attention_in"),
+        "seed": kwargs.get("seed", 0),
+        "offset": kwargs.get("offset", 0),
+        "numels": kwargs.get("numels", 0),
+        "atten_mask": kwargs.get("atten_mask")
+    }
     return args, dims_kwargs, new_kwargs
@@ -368,12 +394,12 @@ def npu_fusion_attention(*args, **kwargs):
     new_args, dims_kwargs, new_kwargs = npu_fusion_attention_forward_patch(*args, **kwargs)
     query, key, value = new_args[0], new_args[1], new_args[2]
     input_layout = get_input_layout(*args, **kwargs)
-    N1 = dims_kwargs.get("N1")
-    N2 = dims_kwargs.get("N2")
-    S1 = dims_kwargs.get("S1")
-    S2 = dims_kwargs.get("S2")
-    B = dims_kwargs.get("B")
-    DTYPE = dims_kwargs.get("DTYPE")
+    n1 = dims_kwargs.get("n1")
+    n2 = dims_kwargs.get("n2")
+    s1 = dims_kwargs.get("s1")
+    s2 = dims_kwargs.get("s2")
+    b = dims_kwargs.get("b")
+    dtype = dims_kwargs.get("dtype")
     atten_mask = new_kwargs.get("atten_mask")
     keep_prob = new_kwargs.get("keep_prob")
     sparse_mode = new_kwargs.get("sparse_mode")
@@ -381,12 +407,12 @@ def npu_fusion_attention(*args, **kwargs):
     next_tockens = new_kwargs.get("next_tockens")
     pse = new_kwargs.get("pse")
     scale = new_kwargs.get("scale")
-    atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE)
-    query = convert_to_bnsd(query, N1, input_layout)
-    key = convert_to_bnsd(key, N2, input_layout)
-    value = convert_to_bnsd(value, N2, input_layout)
-    k_new, v_new = generate_kv(key, value, N1, N2)
+    args_temp = [sparse_mode, atten_mask, b, n1, s1, s2, pre_tockens, next_tockens, dtype]
+    atten_mask = generate_atten_mask(*args_temp)
+    query = convert_to_bnsd(query, n1, input_layout)
+    key = convert_to_bnsd(key, n2, input_layout)
+    value = convert_to_bnsd(value, n2, input_layout)
+    k_new, v_new = generate_kv(key, value, n1, n2)
     out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new,
                                                                     drop_mask=None, atten_mask=atten_mask,
                                                                     pse=pse, scale=scale,
@@ -403,13 +429,13 @@ def npu_fusion_attention_grad(*args, **kwargs):
     # dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob
     new_args, dims_kwargs, new_kwargs = npu_fusion_attention_backward_patch(*args, **kwargs)
     query, key, value, dx, input_layout = new_args[0], new_args[1], new_args[2], new_args[3], new_args[5]
-    N1 = dims_kwargs.get("N1")
-    N2 = dims_kwargs.get("N2")
-    S1 = dims_kwargs.get("S1")
-    S2 = dims_kwargs.get("S2")
-    B = dims_kwargs.get("B")
-    D = dims_kwargs.get("D")
-    DTYPE = dims_kwargs.get("DTYPE")
+    n1 = dims_kwargs.get("n1")
+    n2 = dims_kwargs.get("n2")
+    s1 = dims_kwargs.get("s1")
+    s2 = dims_kwargs.get("s2")
+    b = dims_kwargs.get("b")
+    d = dims_kwargs.get("d")
+    dtype = dims_kwargs.get("dtype")
     atten_mask = new_kwargs.get("atten_mask")
     keep_prob = new_kwargs.get("keep_prob")
     sparse_mode = new_kwargs.get("sparse_mode")
@@ -420,12 +446,13 @@ def npu_fusion_attention_grad(*args, **kwargs):
     softmax_sum = new_kwargs.get("softmax_sum")
     scale_value = new_kwargs.get("scale_value")
-    atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE)
-    query = convert_to_bnsd(query, N1, input_layout)
-    dx = convert_to_bnsd(dx, N1, input_layout)
-    key = convert_to_bnsd(key, N2, input_layout)
-    value = convert_to_bnsd(value, N2, input_layout)
-    k_new, v_new = generate_kv(key, value, N1, N2)
+    args_temp = [sparse_mode, atten_mask, b, n1, s1, s2, pre_tockens, next_tockens, dtype]
+    atten_mask = generate_atten_mask(*args_temp)
+    query = convert_to_bnsd(query, n1, input_layout)
+    dx = convert_to_bnsd(dx, n1, input_layout)
+    key = convert_to_bnsd(key, n2, input_layout)
+    value = convert_to_bnsd(value, n2, input_layout)
+    k_new, v_new = generate_kv(key, value, n1, n2)
     if softmax_build_mode == "QKV":
         softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value)
@@ -435,12 +462,12 @@ def npu_fusion_attention_grad(*args, **kwargs):
     dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
     # N不等长适配by cdy
-    if not (N1 == N2):
-        if N2 == 0:
-            raise ValueError("dims_kwargs.N2 must be non-zero.")
-        G = int(N1 / N2)
-        dk = torch.sum(dk.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D)
-        dv = torch.sum(dv.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D)
+    if not (n1 == n2):
+        if n2 == 0:
+            raise ValueError("dims_kwargs.n2 must be non-zero.")
+        g = int(n1 / n2)
+        dk = torch.sum(dk.reshape(b, n2, g, s2, d), dim=2, keepdim=True).reshape(b, n2, s2, d)
+        dv = torch.sum(dv.reshape(b, n2, g, s2, d), dim=2, keepdim=True).reshape(b, n2, s2, d)
     if dq.dim() == 5:
         dq = dq.reshape(dq.size(0), dq.size(1) * dq.size(2), dq.size(3), dq.size(4))
@@ -460,12 +487,12 @@ def is_attention_off_due_to_mask(atten_mask_dtype):
     return not atten_mask_dtype
-def is_attention_off_in_sparse_mode_4(sparse_mode, next_tockens, pre_tockens, S1):
-    return sparse_mode == 4 and (next_tockens != 0 or pre_tockens < S1)
+def is_attention_off_in_sparse_mode_4(sparse_mode, next_tockens, pre_tockens, s1):
+    return sparse_mode == 4 and (next_tockens != 0 or pre_tockens < s1)
-def is_attention_off_in_sparse_mode_0(sparse_mode, pre_tockens, next_tockens, S1, S2):
-    return sparse_mode == 0 and pre_tockens >= S1 and next_tockens >= S2
+def is_attention_off_in_sparse_mode_0(sparse_mode, pre_tockens, next_tockens, s1, s2):
+    return sparse_mode == 0 and pre_tockens >= s1 and next_tockens >= s2
 def gpu_fusion_attention(*args, **kwargs):
@@ -474,11 +501,11 @@ def gpu_fusion_attention(*args, **kwargs):
     query, key, value = new_args[0], new_args[1], new_args[2]
     keep_prob = new_kwargs.get("keep_prob", 1.0)
     scale = new_kwargs.get("scale")
-    N1 = dims_kwargs.get("N1")
-    N2 = dims_kwargs.get("N2")
-    S1 = dims_kwargs.get("S1")
-    S2 = dims_kwargs.get("S2")
-    B = dims_kwargs.get("B")
+    n1 = dims_kwargs.get("n1")
+    n2 = dims_kwargs.get("n2")
+    s1 = dims_kwargs.get("s1")
+    s2 = dims_kwargs.get("s2")
+    b = dims_kwargs.get("b")
     pse = new_kwargs.get("pse")
     sparse_mode = new_kwargs.get("sparse_mode")
     pre_tockens = new_kwargs.get("pre_tockens")
@@ -488,22 +515,24 @@ def gpu_fusion_attention(*args, **kwargs):
     pre_tockens = min(CompareConst.MAX_TOKENS, pre_tockens)
     next_tockens = min(CompareConst.MAX_TOKENS, next_tockens)
     atten_off = (is_attention_off_due_to_mask(atten_mask_dtype) or
-             is_attention_off_in_sparse_mode_4(sparse_mode, next_tockens, pre_tockens, S1) or
-             is_attention_off_in_sparse_mode_0(sparse_mode, pre_tockens, next_tockens, S1, S2))
+                 is_attention_off_in_sparse_mode_4(sparse_mode, next_tockens, pre_tockens, s1) or
+                 is_attention_off_in_sparse_mode_0(sparse_mode, pre_tockens, next_tockens, s1, s2))
     causal_switch = not atten_off
     if sparse_mode == CompareConst.SPECIAL_SPARSE_MOED:
         window_left = pre_tockens
         window_right = next_tockens
     else:
         pre_tockens = next_tockens = CompareConst.MAX_TOKENS
-        window_left = pre_tockens - S1 + S2
-        window_right = next_tockens + S1 - S2
+        window_left = pre_tockens - s1 + s2
+        window_right = next_tockens + s1 - s2
     if pse is not None:
-        alibi_slopes = torch.rand(B, N1, dtype=torch.float32) * 0.3
+        alibi_slopes = torch.rand(b, n1, dtype=torch.float32) * 0.3
     else:
         alibi_slopes = None
-    out = flash_attn_func(query, key, value, dropout_p=(1-keep_prob), softmax_scale=scale, causal=causal_switch,
-                          window_size=(window_left, window_right), alibi_slopes=alibi_slopes, deterministic=deterministic)
+    out = flash_attn_func(
+        query, key, value, dropout_p=(1 - keep_prob), softmax_scale=scale, causal=causal_switch,
+        window_size=(window_left, window_right), alibi_slopes=alibi_slopes, deterministic=deterministic
+    )
     return out, Const.NONE, Const.NONE

msprobe/pytorch/bench_functions/rms_norm.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch

msprobe/pytorch/bench_functions/rotary_mul.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch
@@ -25,15 +40,19 @@ def npu_rotary_mul_backward(dy_tensor, x, r1, r2):
     x_shape = x.shape
     h = x.float()
     grad = dy_tensor.float()
-    condition_1 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and
-                   ((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and
-                   (r1_shape[1] == x_shape[1]) and (r1_shape[3] == x_shape[3]))
-    condition_2 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and
-                   ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and
-                   (r1_shape[2] == x_shape[2]) and (r1_shape[3] == x_shape[3]))
-    condition_3 = (((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and
-                   ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and
-                   (r1_shape[0] == x_shape[0]) and (r1_shape[3] == x_shape[3]))
+    condition_1 = (r1_shape[0] == 1
+                   and r1_shape[1] == x_shape[1]
+                   and r1_shape[2] == 1
+                   and r1_shape[3] == x_shape[3])
+    condition_2 = (r1_shape[0] == 1
+                   and r1_shape[1] == 1
+                   and r1_shape[2] == x_shape[2]
+                   and r1_shape[3] == x_shape[3])
+    condition_3 = (r1_shape[0] == x_shape[0]
+                   and r1_shape[1] == 1
+                   and r1_shape[2] == 1
+                   and r1_shape[3] == x_shape[3])
     if condition_1:
         for i in range(x_shape[0]):
             for j in range(x_shape[2]):

msprobe/pytorch/bench_functions/scaled_mask_softmax.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch

msprobe/pytorch/bench_functions/swiglu.py CHANGED Viewed

@@ -1,16 +1,31 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch
 def npu_swiglu(x, dim=-1):
     tensor_dtype = x.dtype
-    inTensors = torch.chunk(x, 2, dim=dim)
+    in_tensors = torch.chunk(x, 2, dim=dim)
     if tensor_dtype == torch.float32:
-        tensor_scalar = torch.sigmoid(torch.mul(inTensors[0], 1.0))
-        output_data = torch.mul(torch.mul(tensor_scalar, inTensors[0]), inTensors[1])
+        tensor_scalar = torch.sigmoid(torch.mul(in_tensors[0], 1.0))
+        output_data = torch.mul(torch.mul(tensor_scalar, in_tensors[0]), in_tensors[1])
     else:
-        tensor_self_float = inTensors[0].type(torch.float)
-        tensor_other_float = inTensors[1].type(torch.float)
+        tensor_self_float = in_tensors[0].type(torch.float)
+        tensor_other_float = in_tensors[1].type(torch.float)
         tensor_out_float = torch.nn.functional.silu(tensor_self_float).type(tensor_dtype).type(
             torch.float32) * tensor_other_float
         output_data = tensor_out_float.type(tensor_dtype)

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl