PyPI - mindspore - Versions diffs - 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl - Mend

mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (170) hide show

mindspore/.commit_id +1 -1
mindspore/_akg/akg/composite/build_module.py +104 -20
mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
mindspore/_akg/akg/utils/kernel_exec.py +41 -15
mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
mindspore/_akg/akg/utils/util.py +56 -1
mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/_checkparam.py +3 -3
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/splitter.py +3 -2
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
mindspore/_extends/parse/__init__.py +3 -2
mindspore/_extends/parse/parser.py +6 -1
mindspore/_extends/parse/standard_method.py +14 -11
mindspore/_extends/remote/kernel_build_server.py +2 -1
mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/common/_utils.py +16 -0
mindspore/common/api.py +1 -1
mindspore/common/auto_dynamic_shape.py +81 -85
mindspore/common/dump.py +1 -1
mindspore/common/tensor.py +3 -20
mindspore/config/op_info.config +1 -1
mindspore/context.py +11 -4
mindspore/dataset/engine/cache_client.py +8 -5
mindspore/dataset/engine/datasets_standard_format.py +5 -0
mindspore/dataset/vision/transforms.py +21 -21
mindspore/experimental/optim/adam.py +1 -1
mindspore/gen_ops.py +1 -1
mindspore/include/api/model.h +17 -0
mindspore/include/api/status.h +8 -3
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/nn/cell.py +0 -3
mindspore/nn/layer/activation.py +4 -5
mindspore/nn/layer/conv.py +39 -23
mindspore/nn/layer/flash_attention.py +54 -129
mindspore/nn/layer/math.py +3 -7
mindspore/nn/layer/rnn_cells.py +5 -5
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +12 -3
mindspore/numpy/utils_const.py +5 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_utils/utils.py +2 -0
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
mindspore/ops/function/array_func.py +10 -7
mindspore/ops/function/grad/grad_func.py +0 -1
mindspore/ops/function/nn_func.py +98 -9
mindspore/ops/function/random_func.py +2 -1
mindspore/ops/op_info_register.py +24 -21
mindspore/ops/operations/__init__.py +6 -2
mindspore/ops/operations/_grad_ops.py +25 -6
mindspore/ops/operations/_inner_ops.py +155 -23
mindspore/ops/operations/array_ops.py +9 -7
mindspore/ops/operations/comm_ops.py +2 -2
mindspore/ops/operations/custom_ops.py +85 -68
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +7 -6
mindspore/ops/operations/nn_ops.py +193 -49
mindspore/parallel/_parallel_serialization.py +10 -3
mindspore/parallel/_tensor.py +4 -1
mindspore/parallel/checkpoint_transform.py +13 -2
mindspore/parallel/shard.py +17 -10
mindspore/profiler/common/util.py +1 -0
mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
mindspore/profiler/parser/ascend_op_generator.py +1 -1
mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
mindspore/profiler/parser/base_timeline_generator.py +1 -1
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
mindspore/profiler/parser/framework_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +19 -0
mindspore/profiler/profiling.py +46 -24
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/parsers/for_parser.py +7 -7
mindspore/rewrite/parsers/module_parser.py +4 -4
mindspore/rewrite/symbol_tree.py +1 -4
mindspore/run_check/_check_version.py +5 -3
mindspore/safeguard/rewrite_obfuscation.py +52 -28
mindspore/scipy/ops.py +55 -5
mindspore/scipy/optimize/__init__.py +3 -2
mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
mindspore/train/callback/_summary_collector.py +1 -1
mindspore/train/dataset_helper.py +1 -0
mindspore/train/model.py +2 -2
mindspore/train/serialization.py +97 -11
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +23 -7
mindspore/version.py +1 -1
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
/mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/.commit_id CHANGED Viewed

	@@ -1 +1 @@
1	- __commit_id__ = '[sha1]:~~9390851d~~,[branch]:(HEAD,origin/r2.2,r2.2)'
1	+ __commit_id__ = '[sha1]:8c390933,[branch]:(HEAD,origin/r2.2,r2.2)'

mindspore/_akg/akg/composite/build_module.py CHANGED Viewed

@@ -19,16 +19,17 @@ import os
 import json
 from collections.abc import Iterable
 import akg
+import math
 from akg import tvm
 from tvm.autotvm.env import AutotvmGlobalScope
+from akg.utils.util import parse_workspace_map
 from akg.utils.tbe_codegen_utils import build_tbe_codegen
 from akg.utils.kernel_exec import ReturnType, is_symbolic_tiling
 from .split_stitch import split_stitch_attr
 from .construct_args import ConstructType, ConstructKey
 from .construct_args import get_construct_args, get_tune_construct_args, \
     should_enable_attr, get_stmt_for_tune, add_attrs_in_segment_infos
+from utils.util import get_ascend_type
 def generate_trait(desc):
     """
     generate trait of kernel description
@@ -314,12 +315,12 @@ def merge_attrs(attrs_a, attrs_b):
     return attrs
-def read_repo_file(repo_file):
+def read_repo_file(repo_file, is_json_load=True):
     if not os.path.exists(repo_file):
         return {}
     with open(repo_file, 'r') as f:
-        repo = json.loads(f.read())
-    return repo
+        repo = f.read()
+    return json.loads(repo) if is_json_load else repo
 def _get_default_repository_file(process):
@@ -523,17 +524,6 @@ def _build_to_module(desc_s, desc_d, attrs=None, poly=True):
     return _cpp_build(attrs, process, poly, segment_tree, segment_infos)
-def _get_ascend_type(desc):
-    if "target_info" not in desc.keys():
-        return None
-    target_info_type = desc["target_info"]
-    if target_info_type.get("arch"):
-        return target_info_type.get("arch")
-    return None
 def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
     """
     build kernel with compute description in json format
@@ -650,7 +640,8 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
         ConstructType.NORMAL: _normal_postprocess,
     }
     process = desc_d_in["process"]
-    ascend_type = _get_ascend_type(desc_d_in)
+    kernel_name = desc_d_in['op']
+    ascend_type = get_ascend_type(desc_d_in)
     ascend_type_to_section = {"Ascend910A": "1.6", "Ascend310P3": "1.7",
                               "Ascend910B1": "2.1", "Ascend910B2": "2.2", "Ascend910B3": "2.3", "Ascend910B4": "2.4"}
     if ascend_type is not None:
@@ -659,22 +650,115 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
         config_func(section)
         if section >= "2.1":
             attr["is_tbe_codegen"] = True
+            attr["pragma_modshift"] = True
     segment_tree, segment_infos = get_construct_args(desc_s_in, attr, post_funcs)
+    if desc_d_in.get("enable_cce_lib"):
+        attr["enable_cce_lib"] = True
+        return _build_to_module_ascend_lib(desc_s_in, kernel_name)
     poly = True
     res = _cpp_build(attr, process, poly, segment_tree, segment_infos)
     if attr.get("is_tbe_codegen"):
-        kernel_name = desc_d_in['op']
         stmt_json = akg.tvm.save_json(res[0], "0.8.0")
         args_json = []
         for buf in res[1]:
             args_json.append(akg.tvm.save_json(buf, "0.8.0"))
+        workspace_dict = parse_workspace_map(res[2])
+        if workspace_dict is not None:
+            attr["workspace"] = workspace_dict
-        is_success = build_tbe_codegen(kernel_name, stmt_json, args_json, ascend_type, attr.get("dynamic", False))
+        is_success = build_tbe_codegen(kernel_name, stmt_json, args_json, attr, ascend_type)
         if not is_success:
             raise TypeError("npu_inference codegen failed.")
         return kernel_name
     return res
+def _build_to_module_ascend_lib(desc_s_in, kernel_name):
+    def _get_all_shape(shapes):
+        shape_split = shapes.split(".")
+        shape_list = []
+        for shape in shape_split:
+            if "-" in shape:
+                tmp_shape = shape.split("-")[0]
+                for _ in range(shape.count("-") + 1):
+                    shape_list.append(tmp_shape)
+            else:
+                shape_list.append(shape)
+        return shape_list
+    def _get_tiling_info(desc_s):
+        compute, shape, dtype = generate_trait(desc_s)
+        tiling_info = {}
+        if "MatMul" in compute:
+            trans_a = compute.split("_")[1]
+            trans_b = compute.split("_")[-1].split(".")[0]
+            shape_list = _get_all_shape(shape)
+            bias_flag = int(len(shape_list) > 3)
+            tensor_A = shape_list[0]
+            tensor_B = shape_list[1]
+            tensor_A_split = tensor_A.split("_")
+            if len(tensor_A_split) > 2:
+                batch_size = int(tensor_A.split("_")[0])
+            else:
+                batch_size = 1
+            if trans_a == "1":
+                M = int(tensor_A_split[-1])
+                K = int(tensor_A_split[-2])
+            else:
+                M = int(tensor_A_split[-2])
+                K = int(tensor_A_split[-1])
+            if trans_b == "1":
+                N = int(tensor_B.split("_")[-2])
+            else:
+                N = int(tensor_B.split("_")[-1])
+            tensor_A_type = str(dtype.split("-")[0])
+            tiling_info = {"batch_size":batch_size, "M": M, "N": N, "K": K, "trans_a": int(trans_a), "trans_b": int(trans_b),
+                           "tensor_A_type": tensor_A_type, "bias_flag": bias_flag, "op_type": "MatMul"}
+        elif "PagedAttention" in compute or "PagedAttentionMask" in compute:
+            shape_list = _get_all_shape(shape)
+            query = shape_list[0]
+            key_cache = shape_list[1]
+            table_shape = shape_list[3]
+            num_tokens = int(query.split("_")[0])
+            num_heads = int(query.split("_")[1])
+            embedding_size = int(query.split("_")[2])
+            num_blocks = int(key_cache.split("_")[0])
+            block_size = int(key_cache.split("_")[1])
+            kv_heads = int(key_cache.split("_")[2])
+            max_num_blocks_per_query = int(table_shape.split("_")[1])
+            tor = float(1.0 / math.sqrt(1.0 * embedding_size))
+            tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "embedding_size": embedding_size,
+                           "num_blocks": num_blocks, "block_size": block_size, "max_num_blocks_per_query": max_num_blocks_per_query,
+                           "tor": tor, "kv_heads": kv_heads, "op_type": "PagedAttention"}
+            if "PagedAttentionMask" in compute:
+                mask_shape = shape_list[5]
+                tiling_info["mask"] = list(map(int, mask_shape.split("_")))
+                tiling_info["op_type"] = "PagedAttentionMask"
+        elif "ReshapeAndCache" in compute:
+            shape_list = _get_all_shape(shape)
+            kv = shape_list[0]
+            num_tokens = int(kv.split("_")[0])
+            num_heads = int(kv.split("_")[1])
+            head_size = int(kv.split("_")[2])
+            tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "head_size": head_size,
+                           "op_type": "ReshapeAndCache"}
+        return tiling_info
+    func = tvm.get_global_func("build_cce_lib")
+    tiling_info = _get_tiling_info(json.loads(desc_s_in))
+    func(kernel_name, tiling_info, None)
+    return kernel_name
 def _set_backend(desc_d):
     desc_d_process = desc_d
     for i, op in enumerate(desc_d.get("op_desc")):
@@ -772,4 +856,4 @@ def get_tiling_space(kernel_desc, level=1, attr=None):
         spaces['c0_mod'] = ret.c0_tile_mod_table.asnumpy().tolist()
         if level >= 2:
             spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
-    return spaces
+    return spaces

mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py ADDED Viewed

@@ -0,0 +1,76 @@
+import os
+import re
+import subprocess
+from enum import Enum
+from .file_manager import FileManager
+from .path_manager import PathManager
+class CANNDataEnum(Enum):
+    OP_SUMMARY = 0
+    NPU_MEMORY = 1
+    MSPROF_TIMELINE = 2
+    STEP_TRACE = 3
+    GE_MEMORY_RECORD = 4
+    GE_OPERATOR_MEMORY = 5
+    L2_CACHE = 6
+    AI_CPU = 7
+    COMMUNICATION = 8
+    MATRIX = 9
+class CANNFileParser:
+    COMMAND_SUCCESS = 0
+    ACL_TO_NPU = "acl_to_npu"
+    START_FLOW = "s"
+    END_FLOW = "f"
+    SUMMARY = "summary"
+    TIMELINE = "timeline"
+    ANALYZE = "analyze"
+    CANN_DATA_MATCH = {
+        CANNDataEnum.OP_SUMMARY: [r"^op_summary_\d+_\d+\.csv", r"^op_summary_\d+_\d+_\d+\.csv",
+                                  r"^op_summary_\d+_\d+_\d+_\d+\.csv"],
+    }
+    def __init__(self, profiler_path: str):
+        self._cann_path = PathManager.get_cann_path(profiler_path)
+        self._file_dict = {}
+        self._file_dispatch()
+    def export_cann_profiling(self):
+        if not os.path.isdir(self._cann_path):
+            return
+        self._del_summary_and_timeline_data()
+        completed_process = subprocess.run(["msprof", "--export=on", f"--output={self._cann_path}"],
+                                           capture_output=True)
+        if completed_process.returncode != self.COMMAND_SUCCESS:
+            raise RuntimeError(
+                f"Export CANN Profiling data failed, please verify that the ascend-toolkit is installed and set-env.sh "
+                f"is sourced. or you can execute the command to confirm the CANN Profiling export result: "
+                f"msprof --export=on --output={self._cann_path}")
+    def get_file_list_by_type(self, file_type: CANNDataEnum) -> set:
+        return self._file_dict.get(file_type, set())
+    def _file_dispatch(self):
+        all_file_list = PathManager.get_device_all_file_list_by_type(self._cann_path, self.SUMMARY)
+        all_file_list += PathManager.get_device_all_file_list_by_type(self._cann_path, self.TIMELINE)
+        all_file_list += PathManager.get_analyze_all_file(self._cann_path, self.ANALYZE)
+        for file_path in all_file_list:
+            if not os.path.isfile(file_path):
+                continue
+            for data_type, re_match_exp_list in self.CANN_DATA_MATCH.items():
+                for re_match_exp in re_match_exp_list:
+                    if re.match(re_match_exp, os.path.basename(file_path)):
+                        self._file_dict.setdefault(data_type, set()).add(file_path)
+    def _del_summary_and_timeline_data(self):
+        device_path = PathManager.get_device_path(self._cann_path)
+        if not device_path:
+            return
+        summary_path = os.path.join(device_path, "summary")
+        timeline_path = os.path.join(device_path, "timeline")
+        FileManager.remove_file_safety(summary_path)
+        FileManager.remove_file_safety(timeline_path)

mindspore/_akg/akg/utils/ascend_profilier/file_manager.py ADDED Viewed

@@ -0,0 +1,56 @@
+import csv
+import json
+import os.path
+import shutil
+from warnings import warn
+MAX_FILE_SIZE = 1024 * 1024 * 1024 * 10
+MAX_CSV_SIZE = 1024 * 1024 * 1024 * 5
+class FileManager:
+    @classmethod
+    def file_read_all(cls, file_path: str, mode: str = "r") -> any:
+        if not os.path.isfile(file_path):
+            return ''
+        file_size = os.path.getsize(file_path)
+        if file_size <= 0:
+            return ''
+        if file_size > MAX_FILE_SIZE:
+            warn(f"The file size exceeds the preset value {MAX_FILE_SIZE / 1024 / 1024}MB, "
+                 f"please check the file: {file_path}")
+            return ''
+        try:
+            with open(file_path, mode) as file:
+                return file.read()
+        except Exception:
+            raise RuntimeError(f"Can't read file: {file_path}")
+    @classmethod
+    def read_csv_file(cls, file_path: str, class_bean: any) -> list:
+        if not os.path.isfile(file_path):
+            return []
+        file_size = os.path.getsize(file_path)
+        if file_size <= 0:
+            return []
+        if file_size > MAX_CSV_SIZE:
+            warn(f"The file size exceeds the preset value {MAX_CSV_SIZE / 1024 / 1024}MB, "
+                 f"please check the file: {file_path}")
+            return []
+        result_data = []
+        try:
+            with open(file_path, newline="") as csv_file:
+                reader = csv.DictReader(csv_file)
+                for row in reader:
+                    result_data.append(class_bean(row))
+        except Exception:
+            raise RuntimeError(f"Failed to read the file: {file_path}")
+        return result_data
+    @classmethod
+    def remove_file_safety(cls, path: str):
+        if os.path.exists(path):
+            try:
+                shutil.rmtree(path)
+            except Exception:
+                print(f"[WARNING] [{os.getpid()}] profiler.py: Can't remove the directory: {path}")

mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py ADDED Viewed

@@ -0,0 +1,23 @@
+from .op_summary_headers import OpSummaryHeaders
+class OpSummaryBean:
+    headers = []
+    def __init__(self, data: list):
+        self._data = data
+    @property
+    def row(self) -> list:
+        row = []
+        read_headers = OpSummaryBean.headers if OpSummaryBean.headers else self._data.keys()
+        for field_name in read_headers:
+            row.append(self._data.get(field_name, ""))
+        return row
+    @property
+    def ts(self) -> float:
+        return float(self._data.get(OpSummaryHeaders.TASK_START_TIME, 0))
+    @property
+    def all_headers(self) -> list:
+        return list(self._data.keys())

mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py ADDED Viewed

@@ -0,0 +1,8 @@
+class OpSummaryHeaders(object):
+    # op_summary
+    TASK_START_TIME = "Task Start Time(us)"
+    AIC_TOTAL_CYCLES = "aic_total_cycles"
+    AIV_TOTAL_CYCLES = "aiv_total_cycles"
+    TASK_DURATION = "Task Duration(us)"
+    OP_SUMMARY_SHOW_HEADERS = ["Op Name", "OP Type", "Task Type", TASK_START_TIME, TASK_DURATION,
+                               "Task Wait Time(us)", "Block Dim" ,AIC_TOTAL_CYCLES, AIV_TOTAL_CYCLES]

mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py ADDED Viewed

@@ -0,0 +1,42 @@
+from .op_summary_headers import OpSummaryHeaders
+from .file_manager import FileManager
+from .op_summary_bean import OpSummaryBean
+from .cann_file_parser import CANNFileParser, CANNDataEnum
+class OpSummaryParser():
+    def __init__(self, profiler_path: str):
+        self._profiler_path = profiler_path
+    @classmethod
+    def _project_map_for_headers(cls, input_headers: list):
+        project_map_dict = {OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS[i]: OpSummaryHeaders.OP_SUMMARY_KERNEL_BASE_HEADERS[i] for i in
+                            range(len(OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS))}
+        output_headers = []
+        for header in input_headers:
+            if header in project_map_dict:
+                output_headers.append(project_map_dict.get(header))
+            else:
+                output_headers.append(header)
+        return output_headers
+    def generate_op_summary_data(self) -> dict:
+        op_summary_file_set = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.OP_SUMMARY)
+        summary_data = []
+        for file_path in op_summary_file_set:
+            all_data = FileManager.read_csv_file(file_path, OpSummaryBean)
+            if all_data:
+                OpSummaryBean.headers = OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS
+                output_headers = OpSummaryBean.headers
+                summary_data.extend([data.row for data in all_data])
+            else:
+                raise RuntimeError("parse op summary csv failed.")
+        return self.create_dict(summary_data[0],output_headers)
+    def create_dict(self,summary_data,headers):
+        summary_dict={}
+        for i in range(len(summary_data)):
+            summary_dict[headers[i]] = summary_data[i]
+        return summary_dict

mindspore/_akg/akg/utils/ascend_profilier/path_manager.py ADDED Viewed

@@ -0,0 +1,65 @@
+import os
+import re
+class PathManager:
+    @classmethod
+    def get_cann_path(cls, profiler_path: str) -> str:
+        sub_dirs = os.listdir(os.path.realpath(profiler_path))
+        for sub_dir in sub_dirs:
+            sub_path = os.path.join(profiler_path, sub_dir)
+            if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z]+", sub_dir):
+                return sub_path
+        return ""
+    @classmethod
+    def get_device_path(cls, cann_path: str) -> str:
+        sub_dirs = os.listdir(os.path.realpath(cann_path))
+        for sub_dir in sub_dirs:
+            sub_path = os.path.join(cann_path, sub_dir)
+            if os.path.isdir(sub_path) and re.match(r"^device_\d", sub_dir):
+                return sub_path
+        return ""
+    @classmethod
+    def get_start_info_path(cls, cann_path: str) -> str:
+        start_info_path = os.path.join(cann_path, "host", "start_info")
+        if os.path.exists(start_info_path):
+            return start_info_path
+        device_path = cls.get_device_path(cann_path)
+        if not device_path:
+            return ""
+        device_path_split = os.path.basename(device_path).split("_")
+        if len(device_path_split) != 2:
+            return ""
+        start_info_file = f"start_info.{device_path_split[1]}"
+        start_info_path = os.path.join(device_path, start_info_file)
+        if os.path.exists(start_info_path):
+            return start_info_path
+        return ""
+    @classmethod
+    def get_device_all_file_list_by_type(cls, profiler_path: str, summary_or_timeline: str) -> list:
+        file_list = []
+        _path = os.path.join(cls.get_device_path(profiler_path), summary_or_timeline)
+        if not os.path.isdir(_path):
+            return file_list
+        sub_files = os.listdir(os.path.realpath(_path))
+        if not sub_files:
+            return file_list
+        for sub_file in sub_files:
+            file_list.append(os.path.join(_path, sub_file))
+        return file_list
+    @classmethod
+    def get_analyze_all_file(cls, profiler_path: str, analyze: str) -> list:
+        file_list = []
+        _path = os.path.join(profiler_path, analyze)
+        if not os.path.isdir(_path):
+            return file_list
+        sub_files = os.listdir(os.path.realpath(_path))
+        if not sub_files:
+            return file_list
+        for sub_file in sub_files:
+            file_list.append(os.path.join(_path, sub_file))
+        return file_list

mindspore/_akg/akg/utils/composite_op_helper.py CHANGED Viewed

@@ -519,8 +519,13 @@ def _update_workspace_data(kernel_name, input_for_mod, output_indexes):
             kernel_desc = json.loads(kernel_json)
             if "workspace" in kernel_desc:
                 workspace_bytes = kernel_desc["workspace"]["size"]
-                item = np.full(workspace_bytes, np.nan, np.int8)
-                workspace_tensors.append(item)
+                workspace_num = kernel_desc["workspace"]["num"]
+                if len(workspace_bytes) != workspace_num:
+                    raise ValueError("workspace num %s and size shape %s are not equal!"
+                                     % (len(workspace_bytes), workspace_num))
+                for i in range(kernel_desc["workspace"]["num"]):
+                    item = np.full(workspace_bytes[i], np.nan, np.int8)
+                    workspace_tensors.append(item)
     else:
         logging.warning("Kernel json file %s not found", json_file)

mindspore/_akg/akg/utils/dump_ascend_meta.py CHANGED Viewed

@@ -24,9 +24,23 @@ import akg.tvm
 from akg.global_configs import get_kernel_meta_path
 from akg.utils.util import parse_workspace, write_code
+def set_ascend910b(code, core_type, title_dict):
+    if len(core_type) == 0:
+        return
+    if core_type == "MIX":
+        title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF"
+        title_dict["coreType"] = "MIX"
+        title_dict["intercoreSync"] = 1
+        title_dict["taskRation"] = "1:2"
+    elif core_type == "AIC":
+        title_dict["coreType"] = "AiCore"
+        title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AICUBE"
+    elif core_type == "AIV":
+        title_dict["coreType"] = "VectorCore"
+        title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AIVEC"
 @akg.tvm.register_func
-def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
+def tvm_callback_cce_postproc(code, block_dim=1, workspace=None, core_type=""):
     """Function for dumping ascend meta."""
     if "__aicore__" in code:
         title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"}
@@ -35,8 +49,13 @@ def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
         title_dict = dict()
     # kernel name
-    kernel_name = code.split("_kernel")[0].split(" ")[-1]
-    title_dict["kernelName"] = kernel_name + "_kernel0"
+    if "_kernel" in code:
+        kernel_name = code.split("_kernel")[0].split(" ")[-1]
+        title_dict["kernelName"] = kernel_name + "_kernel0"
+    elif "_mix_aic" in code:
+        kernel_name = code.split("_mix_aic")[0].split(" ")[-1]
+        title_dict["kernelName"] = kernel_name
+    set_ascend910b(code, core_type, title_dict)
     # thread info
     title_dict["blockDim"] = block_dim

mindspore/_akg/akg/utils/kernel_exec.py CHANGED Viewed

@@ -43,10 +43,12 @@ from akg.utils import custom_tiling as ct_util
 from akg.utils import validation_check as vc_util
 from akg.utils.dsl_create import TensorUtils
 from akg.utils.util import parse_kwargs
-from akg.backend.parsing_profiling_data import HWTSLogParser
+from akg.backend.parsing_profiling_data import HWTSLogParser, max_time_consume
 from akg.backend.parsing_profiling_data import validate_and_normalize_path
 from akg.backend import aic_model
+from .ascend_profilier.cann_file_parser import CANNFileParser
+from .ascend_profilier.op_summary_parser import OpSummaryParser
+from .ascend_profilier.op_summary_headers import OpSummaryHeaders
 sh = logging.StreamHandler(sys.stdout)
 logging.getLogger().addHandler(sh)
 logging.getLogger().setLevel(logging.INFO)
@@ -169,7 +171,7 @@ def gen_name_kernel(kernel, dtype, shapes):
     return res
-def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
+def profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=None):
     """
     Function for collecting cycle data from device.
@@ -180,21 +182,45 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
         tuning: tuning model.
         device_id: device_id on device.
     """
-    akg.tvm.get_global_func("ascend_start_profiling")(device_id)
+    akg.tvm.get_global_func("ascend_start_profiling")(kernel_name)
     time_before_launch = time.time()
     output_data = ascend_run(kernel_name, args, outputs, device_id)
     akg.tvm.get_global_func("ascend_stop_profiling")()
-    cycle = profiling_analyse(device_id, time_before_launch)
-    logging.info('=====parsing cycles==============================')
+    cycle = 0
+    if arch is not None and "910B" in arch:
+        # for ascend910B profiling
+        cycle = profiling_analyse_910B(time_before_launch)
+    else:
+        cycle = profiling_analyse(device_id, time_before_launch)
+    logging.info('=====Task Duration(us)==============================')
     if cycle != PROF_ERROR_CODE:
         logging.info(cycle)
     else:
-        logging.error("OOPS, can't correctly parsing cycles!")
+        logging.error("OOPS, can't correctly Task Duration!")
     TestUtils.record_cycle(cycle)
-    logging.info('=====parsing cycles==============================')
+    logging.info('=====Task Duration(us)==============================')
     return output_data, {'run_time': cycle}
+def  profiling_analyse_910B(time_before_launch):
+    public_path = os.getenv('PROFILING_DIR')
+    if public_path is None:
+        raise RuntimeError("Environment PROFILING_DIR not set!")
+    public_path = validate_and_normalize_path(public_path)
+    CANNFileParser(public_path).export_cann_profiling()
+    cann_file_parser = OpSummaryParser(public_path)
+    profiler_file = cann_file_parser._profiler_path
+    logging.debug("prof file is: %s", os.path.basename(profiler_file))
+    file_create_time = os.path.getctime(profiler_file)
+    if file_create_time < time_before_launch:
+        raise RuntimeError("The PROF file is too old")
+    datas:dict = cann_file_parser.generate_op_summary_data()
+    task_duration = float(datas.get(OpSummaryHeaders.TASK_DURATION,max_time_consume))
+    # # aic_total_cycles means ai core cycle
+    # # aiv_total_cycles means ai vector cycle
+    # aiv_total_cycle = int(datas.get(OpSummaryHeaders.AIV_TOTAL_CYCLES,max_time_consume))
+    # aic_total_cycle = int(datas.get(OpSummaryHeaders.AIC_TOTAL_CYCLES,max_time_consume))
+    # return aiv_total_cycle+aic_total_cycle
+    return task_duration
 def profiling_analyse(device_id, time_before_launch):
     """analyse profiling."""
@@ -348,12 +374,12 @@ def get_kernel_name_from_mod(mod):
     return kernel_name
-def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1):
+def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1, arch=None):
     gc.collect()
     if device_id == -1:
         device_id = int(os.environ.get("DEVICE_ID", 0))
     kernel_name = get_kernel_name_from_mod(mod)
-    return profiling_mode_run(kernel_name, args, outputs, tuning, device_id)
+    return profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=arch)
 def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, device_id=-1, repeat_time=400):
@@ -387,7 +413,7 @@ def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, devi
 @func_time_required
-def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400):
+def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400, arch=None):
     """
     unified run CCE kernel api.
@@ -398,7 +424,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
         tuning (bool): tuning model.
         device_id: device_id on device.
         expect: when mode in ["compile_cloud", "compile_mini"], return it.
+        arch: Ascend arch type
     Returns:
         output numpy array, or tuple of numpy array if multi-output.
     """
@@ -414,7 +440,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
         run_args = [kernel_name, args, outputs, device_id]
         if os.environ.get("PROFILING_MODE") == "true":
             run_func = profiling_mode_run
-            run_args = [kernel_name, args, outputs, tuning, device_id]
+            run_args = [kernel_name, args, outputs, tuning, device_id, arch]
             if os.environ.get("PROFILING_DIR", None) is None:
                 os.environ["PROFILING_DIR"] = "."
                 logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
@@ -1031,7 +1057,7 @@ def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
         for buf in enumerate(arg_list):
             args_json.append(akg.tvm.save_json(buf, "0.8.0"))
-        is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs.get("dynamic", False))
+        is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs)
         if not is_success:
             raise TypeError("npu_inference codegen failed.")
         return kernel_name