mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/_akg/akg/composite/build_module.py +104 -20
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/kernel_exec.py +41 -15
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
- mindspore/_akg/akg/utils/util.py +56 -1
- mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +14 -11
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/_utils.py +16 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +54 -129
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +12 -3
- mindspore/numpy/utils_const.py +5 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +6 -2
- mindspore/ops/operations/_grad_ops.py +25 -6
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +7 -6
- mindspore/ops/operations/nn_ops.py +193 -49
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +7 -7
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
mindspore/.commit_id
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__commit_id__ = '[sha1]:
|
|
1
|
+
__commit_id__ = '[sha1]:8c390933,[branch]:(HEAD,origin/r2.2,r2.2)'
|
|
@@ -19,16 +19,17 @@ import os
|
|
|
19
19
|
import json
|
|
20
20
|
from collections.abc import Iterable
|
|
21
21
|
import akg
|
|
22
|
+
import math
|
|
22
23
|
from akg import tvm
|
|
23
24
|
from tvm.autotvm.env import AutotvmGlobalScope
|
|
25
|
+
from akg.utils.util import parse_workspace_map
|
|
24
26
|
from akg.utils.tbe_codegen_utils import build_tbe_codegen
|
|
25
27
|
from akg.utils.kernel_exec import ReturnType, is_symbolic_tiling
|
|
26
28
|
from .split_stitch import split_stitch_attr
|
|
27
29
|
from .construct_args import ConstructType, ConstructKey
|
|
28
30
|
from .construct_args import get_construct_args, get_tune_construct_args, \
|
|
29
31
|
should_enable_attr, get_stmt_for_tune, add_attrs_in_segment_infos
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
from utils.util import get_ascend_type
|
|
32
33
|
def generate_trait(desc):
|
|
33
34
|
"""
|
|
34
35
|
generate trait of kernel description
|
|
@@ -314,12 +315,12 @@ def merge_attrs(attrs_a, attrs_b):
|
|
|
314
315
|
return attrs
|
|
315
316
|
|
|
316
317
|
|
|
317
|
-
def read_repo_file(repo_file):
|
|
318
|
+
def read_repo_file(repo_file, is_json_load=True):
|
|
318
319
|
if not os.path.exists(repo_file):
|
|
319
320
|
return {}
|
|
320
321
|
with open(repo_file, 'r') as f:
|
|
321
|
-
repo =
|
|
322
|
-
return repo
|
|
322
|
+
repo = f.read()
|
|
323
|
+
return json.loads(repo) if is_json_load else repo
|
|
323
324
|
|
|
324
325
|
|
|
325
326
|
def _get_default_repository_file(process):
|
|
@@ -523,17 +524,6 @@ def _build_to_module(desc_s, desc_d, attrs=None, poly=True):
|
|
|
523
524
|
|
|
524
525
|
return _cpp_build(attrs, process, poly, segment_tree, segment_infos)
|
|
525
526
|
|
|
526
|
-
|
|
527
|
-
def _get_ascend_type(desc):
|
|
528
|
-
if "target_info" not in desc.keys():
|
|
529
|
-
return None
|
|
530
|
-
|
|
531
|
-
target_info_type = desc["target_info"]
|
|
532
|
-
if target_info_type.get("arch"):
|
|
533
|
-
return target_info_type.get("arch")
|
|
534
|
-
return None
|
|
535
|
-
|
|
536
|
-
|
|
537
527
|
def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
|
|
538
528
|
"""
|
|
539
529
|
build kernel with compute description in json format
|
|
@@ -650,7 +640,8 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
|
|
|
650
640
|
ConstructType.NORMAL: _normal_postprocess,
|
|
651
641
|
}
|
|
652
642
|
process = desc_d_in["process"]
|
|
653
|
-
|
|
643
|
+
kernel_name = desc_d_in['op']
|
|
644
|
+
ascend_type = get_ascend_type(desc_d_in)
|
|
654
645
|
ascend_type_to_section = {"Ascend910A": "1.6", "Ascend310P3": "1.7",
|
|
655
646
|
"Ascend910B1": "2.1", "Ascend910B2": "2.2", "Ascend910B3": "2.3", "Ascend910B4": "2.4"}
|
|
656
647
|
if ascend_type is not None:
|
|
@@ -659,22 +650,115 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
|
|
|
659
650
|
config_func(section)
|
|
660
651
|
if section >= "2.1":
|
|
661
652
|
attr["is_tbe_codegen"] = True
|
|
653
|
+
attr["pragma_modshift"] = True
|
|
662
654
|
segment_tree, segment_infos = get_construct_args(desc_s_in, attr, post_funcs)
|
|
655
|
+
|
|
656
|
+
if desc_d_in.get("enable_cce_lib"):
|
|
657
|
+
attr["enable_cce_lib"] = True
|
|
658
|
+
return _build_to_module_ascend_lib(desc_s_in, kernel_name)
|
|
659
|
+
|
|
663
660
|
poly = True
|
|
664
661
|
res = _cpp_build(attr, process, poly, segment_tree, segment_infos)
|
|
665
662
|
if attr.get("is_tbe_codegen"):
|
|
666
|
-
kernel_name = desc_d_in['op']
|
|
667
663
|
stmt_json = akg.tvm.save_json(res[0], "0.8.0")
|
|
668
664
|
args_json = []
|
|
669
665
|
for buf in res[1]:
|
|
670
666
|
args_json.append(akg.tvm.save_json(buf, "0.8.0"))
|
|
667
|
+
|
|
668
|
+
workspace_dict = parse_workspace_map(res[2])
|
|
669
|
+
if workspace_dict is not None:
|
|
670
|
+
attr["workspace"] = workspace_dict
|
|
671
671
|
|
|
672
|
-
is_success = build_tbe_codegen(kernel_name, stmt_json, args_json,
|
|
672
|
+
is_success = build_tbe_codegen(kernel_name, stmt_json, args_json, attr, ascend_type)
|
|
673
673
|
if not is_success:
|
|
674
674
|
raise TypeError("npu_inference codegen failed.")
|
|
675
675
|
return kernel_name
|
|
676
676
|
return res
|
|
677
677
|
|
|
678
|
+
def _build_to_module_ascend_lib(desc_s_in, kernel_name):
|
|
679
|
+
def _get_all_shape(shapes):
|
|
680
|
+
shape_split = shapes.split(".")
|
|
681
|
+
shape_list = []
|
|
682
|
+
for shape in shape_split:
|
|
683
|
+
if "-" in shape:
|
|
684
|
+
tmp_shape = shape.split("-")[0]
|
|
685
|
+
for _ in range(shape.count("-") + 1):
|
|
686
|
+
shape_list.append(tmp_shape)
|
|
687
|
+
else:
|
|
688
|
+
shape_list.append(shape)
|
|
689
|
+
return shape_list
|
|
690
|
+
|
|
691
|
+
def _get_tiling_info(desc_s):
|
|
692
|
+
compute, shape, dtype = generate_trait(desc_s)
|
|
693
|
+
tiling_info = {}
|
|
694
|
+
if "MatMul" in compute:
|
|
695
|
+
trans_a = compute.split("_")[1]
|
|
696
|
+
trans_b = compute.split("_")[-1].split(".")[0]
|
|
697
|
+
|
|
698
|
+
shape_list = _get_all_shape(shape)
|
|
699
|
+
bias_flag = int(len(shape_list) > 3)
|
|
700
|
+
tensor_A = shape_list[0]
|
|
701
|
+
tensor_B = shape_list[1]
|
|
702
|
+
|
|
703
|
+
tensor_A_split = tensor_A.split("_")
|
|
704
|
+
if len(tensor_A_split) > 2:
|
|
705
|
+
batch_size = int(tensor_A.split("_")[0])
|
|
706
|
+
else:
|
|
707
|
+
batch_size = 1
|
|
708
|
+
if trans_a == "1":
|
|
709
|
+
M = int(tensor_A_split[-1])
|
|
710
|
+
K = int(tensor_A_split[-2])
|
|
711
|
+
else:
|
|
712
|
+
M = int(tensor_A_split[-2])
|
|
713
|
+
K = int(tensor_A_split[-1])
|
|
714
|
+
|
|
715
|
+
if trans_b == "1":
|
|
716
|
+
N = int(tensor_B.split("_")[-2])
|
|
717
|
+
else:
|
|
718
|
+
N = int(tensor_B.split("_")[-1])
|
|
719
|
+
tensor_A_type = str(dtype.split("-")[0])
|
|
720
|
+
tiling_info = {"batch_size":batch_size, "M": M, "N": N, "K": K, "trans_a": int(trans_a), "trans_b": int(trans_b),
|
|
721
|
+
"tensor_A_type": tensor_A_type, "bias_flag": bias_flag, "op_type": "MatMul"}
|
|
722
|
+
elif "PagedAttention" in compute or "PagedAttentionMask" in compute:
|
|
723
|
+
shape_list = _get_all_shape(shape)
|
|
724
|
+
query = shape_list[0]
|
|
725
|
+
key_cache = shape_list[1]
|
|
726
|
+
table_shape = shape_list[3]
|
|
727
|
+
|
|
728
|
+
num_tokens = int(query.split("_")[0])
|
|
729
|
+
num_heads = int(query.split("_")[1])
|
|
730
|
+
embedding_size = int(query.split("_")[2])
|
|
731
|
+
num_blocks = int(key_cache.split("_")[0])
|
|
732
|
+
block_size = int(key_cache.split("_")[1])
|
|
733
|
+
kv_heads = int(key_cache.split("_")[2])
|
|
734
|
+
|
|
735
|
+
max_num_blocks_per_query = int(table_shape.split("_")[1])
|
|
736
|
+
tor = float(1.0 / math.sqrt(1.0 * embedding_size))
|
|
737
|
+
|
|
738
|
+
tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "embedding_size": embedding_size,
|
|
739
|
+
"num_blocks": num_blocks, "block_size": block_size, "max_num_blocks_per_query": max_num_blocks_per_query,
|
|
740
|
+
"tor": tor, "kv_heads": kv_heads, "op_type": "PagedAttention"}
|
|
741
|
+
if "PagedAttentionMask" in compute:
|
|
742
|
+
mask_shape = shape_list[5]
|
|
743
|
+
tiling_info["mask"] = list(map(int, mask_shape.split("_")))
|
|
744
|
+
tiling_info["op_type"] = "PagedAttentionMask"
|
|
745
|
+
elif "ReshapeAndCache" in compute:
|
|
746
|
+
shape_list = _get_all_shape(shape)
|
|
747
|
+
kv = shape_list[0]
|
|
748
|
+
|
|
749
|
+
num_tokens = int(kv.split("_")[0])
|
|
750
|
+
num_heads = int(kv.split("_")[1])
|
|
751
|
+
head_size = int(kv.split("_")[2])
|
|
752
|
+
|
|
753
|
+
tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "head_size": head_size,
|
|
754
|
+
"op_type": "ReshapeAndCache"}
|
|
755
|
+
return tiling_info
|
|
756
|
+
|
|
757
|
+
func = tvm.get_global_func("build_cce_lib")
|
|
758
|
+
tiling_info = _get_tiling_info(json.loads(desc_s_in))
|
|
759
|
+
func(kernel_name, tiling_info, None)
|
|
760
|
+
return kernel_name
|
|
761
|
+
|
|
678
762
|
def _set_backend(desc_d):
|
|
679
763
|
desc_d_process = desc_d
|
|
680
764
|
for i, op in enumerate(desc_d.get("op_desc")):
|
|
@@ -772,4 +856,4 @@ def get_tiling_space(kernel_desc, level=1, attr=None):
|
|
|
772
856
|
spaces['c0_mod'] = ret.c0_tile_mod_table.asnumpy().tolist()
|
|
773
857
|
if level >= 2:
|
|
774
858
|
spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
|
|
775
|
-
return spaces
|
|
859
|
+
return spaces
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import subprocess
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from .file_manager import FileManager
|
|
7
|
+
from .path_manager import PathManager
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CANNDataEnum(Enum):
|
|
11
|
+
OP_SUMMARY = 0
|
|
12
|
+
NPU_MEMORY = 1
|
|
13
|
+
MSPROF_TIMELINE = 2
|
|
14
|
+
STEP_TRACE = 3
|
|
15
|
+
GE_MEMORY_RECORD = 4
|
|
16
|
+
GE_OPERATOR_MEMORY = 5
|
|
17
|
+
L2_CACHE = 6
|
|
18
|
+
AI_CPU = 7
|
|
19
|
+
COMMUNICATION = 8
|
|
20
|
+
MATRIX = 9
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CANNFileParser:
|
|
24
|
+
COMMAND_SUCCESS = 0
|
|
25
|
+
ACL_TO_NPU = "acl_to_npu"
|
|
26
|
+
START_FLOW = "s"
|
|
27
|
+
END_FLOW = "f"
|
|
28
|
+
SUMMARY = "summary"
|
|
29
|
+
TIMELINE = "timeline"
|
|
30
|
+
ANALYZE = "analyze"
|
|
31
|
+
CANN_DATA_MATCH = {
|
|
32
|
+
CANNDataEnum.OP_SUMMARY: [r"^op_summary_\d+_\d+\.csv", r"^op_summary_\d+_\d+_\d+\.csv",
|
|
33
|
+
r"^op_summary_\d+_\d+_\d+_\d+\.csv"],
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def __init__(self, profiler_path: str):
|
|
37
|
+
self._cann_path = PathManager.get_cann_path(profiler_path)
|
|
38
|
+
self._file_dict = {}
|
|
39
|
+
self._file_dispatch()
|
|
40
|
+
|
|
41
|
+
def export_cann_profiling(self):
|
|
42
|
+
if not os.path.isdir(self._cann_path):
|
|
43
|
+
return
|
|
44
|
+
self._del_summary_and_timeline_data()
|
|
45
|
+
completed_process = subprocess.run(["msprof", "--export=on", f"--output={self._cann_path}"],
|
|
46
|
+
capture_output=True)
|
|
47
|
+
if completed_process.returncode != self.COMMAND_SUCCESS:
|
|
48
|
+
raise RuntimeError(
|
|
49
|
+
f"Export CANN Profiling data failed, please verify that the ascend-toolkit is installed and set-env.sh "
|
|
50
|
+
f"is sourced. or you can execute the command to confirm the CANN Profiling export result: "
|
|
51
|
+
f"msprof --export=on --output={self._cann_path}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_file_list_by_type(self, file_type: CANNDataEnum) -> set:
|
|
55
|
+
return self._file_dict.get(file_type, set())
|
|
56
|
+
|
|
57
|
+
def _file_dispatch(self):
|
|
58
|
+
all_file_list = PathManager.get_device_all_file_list_by_type(self._cann_path, self.SUMMARY)
|
|
59
|
+
all_file_list += PathManager.get_device_all_file_list_by_type(self._cann_path, self.TIMELINE)
|
|
60
|
+
all_file_list += PathManager.get_analyze_all_file(self._cann_path, self.ANALYZE)
|
|
61
|
+
for file_path in all_file_list:
|
|
62
|
+
if not os.path.isfile(file_path):
|
|
63
|
+
continue
|
|
64
|
+
for data_type, re_match_exp_list in self.CANN_DATA_MATCH.items():
|
|
65
|
+
for re_match_exp in re_match_exp_list:
|
|
66
|
+
if re.match(re_match_exp, os.path.basename(file_path)):
|
|
67
|
+
self._file_dict.setdefault(data_type, set()).add(file_path)
|
|
68
|
+
|
|
69
|
+
def _del_summary_and_timeline_data(self):
|
|
70
|
+
device_path = PathManager.get_device_path(self._cann_path)
|
|
71
|
+
if not device_path:
|
|
72
|
+
return
|
|
73
|
+
summary_path = os.path.join(device_path, "summary")
|
|
74
|
+
timeline_path = os.path.join(device_path, "timeline")
|
|
75
|
+
FileManager.remove_file_safety(summary_path)
|
|
76
|
+
FileManager.remove_file_safety(timeline_path)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import json
|
|
3
|
+
import os.path
|
|
4
|
+
import shutil
|
|
5
|
+
from warnings import warn
|
|
6
|
+
|
|
7
|
+
MAX_FILE_SIZE = 1024 * 1024 * 1024 * 10
|
|
8
|
+
MAX_CSV_SIZE = 1024 * 1024 * 1024 * 5
|
|
9
|
+
|
|
10
|
+
class FileManager:
|
|
11
|
+
@classmethod
|
|
12
|
+
def file_read_all(cls, file_path: str, mode: str = "r") -> any:
|
|
13
|
+
if not os.path.isfile(file_path):
|
|
14
|
+
return ''
|
|
15
|
+
file_size = os.path.getsize(file_path)
|
|
16
|
+
if file_size <= 0:
|
|
17
|
+
return ''
|
|
18
|
+
if file_size > MAX_FILE_SIZE:
|
|
19
|
+
warn(f"The file size exceeds the preset value {MAX_FILE_SIZE / 1024 / 1024}MB, "
|
|
20
|
+
f"please check the file: {file_path}")
|
|
21
|
+
return ''
|
|
22
|
+
try:
|
|
23
|
+
with open(file_path, mode) as file:
|
|
24
|
+
return file.read()
|
|
25
|
+
except Exception:
|
|
26
|
+
raise RuntimeError(f"Can't read file: {file_path}")
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def read_csv_file(cls, file_path: str, class_bean: any) -> list:
|
|
30
|
+
if not os.path.isfile(file_path):
|
|
31
|
+
return []
|
|
32
|
+
file_size = os.path.getsize(file_path)
|
|
33
|
+
if file_size <= 0:
|
|
34
|
+
return []
|
|
35
|
+
if file_size > MAX_CSV_SIZE:
|
|
36
|
+
warn(f"The file size exceeds the preset value {MAX_CSV_SIZE / 1024 / 1024}MB, "
|
|
37
|
+
f"please check the file: {file_path}")
|
|
38
|
+
return []
|
|
39
|
+
result_data = []
|
|
40
|
+
try:
|
|
41
|
+
with open(file_path, newline="") as csv_file:
|
|
42
|
+
reader = csv.DictReader(csv_file)
|
|
43
|
+
for row in reader:
|
|
44
|
+
result_data.append(class_bean(row))
|
|
45
|
+
except Exception:
|
|
46
|
+
raise RuntimeError(f"Failed to read the file: {file_path}")
|
|
47
|
+
return result_data
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def remove_file_safety(cls, path: str):
|
|
51
|
+
if os.path.exists(path):
|
|
52
|
+
try:
|
|
53
|
+
shutil.rmtree(path)
|
|
54
|
+
except Exception:
|
|
55
|
+
print(f"[WARNING] [{os.getpid()}] profiler.py: Can't remove the directory: {path}")
|
|
56
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .op_summary_headers import OpSummaryHeaders
|
|
2
|
+
|
|
3
|
+
class OpSummaryBean:
|
|
4
|
+
headers = []
|
|
5
|
+
|
|
6
|
+
def __init__(self, data: list):
|
|
7
|
+
self._data = data
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def row(self) -> list:
|
|
11
|
+
row = []
|
|
12
|
+
read_headers = OpSummaryBean.headers if OpSummaryBean.headers else self._data.keys()
|
|
13
|
+
for field_name in read_headers:
|
|
14
|
+
row.append(self._data.get(field_name, ""))
|
|
15
|
+
return row
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def ts(self) -> float:
|
|
19
|
+
return float(self._data.get(OpSummaryHeaders.TASK_START_TIME, 0))
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def all_headers(self) -> list:
|
|
23
|
+
return list(self._data.keys())
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
class OpSummaryHeaders(object):
|
|
2
|
+
# op_summary
|
|
3
|
+
TASK_START_TIME = "Task Start Time(us)"
|
|
4
|
+
AIC_TOTAL_CYCLES = "aic_total_cycles"
|
|
5
|
+
AIV_TOTAL_CYCLES = "aiv_total_cycles"
|
|
6
|
+
TASK_DURATION = "Task Duration(us)"
|
|
7
|
+
OP_SUMMARY_SHOW_HEADERS = ["Op Name", "OP Type", "Task Type", TASK_START_TIME, TASK_DURATION,
|
|
8
|
+
"Task Wait Time(us)", "Block Dim" ,AIC_TOTAL_CYCLES, AIV_TOTAL_CYCLES]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from .op_summary_headers import OpSummaryHeaders
|
|
2
|
+
from .file_manager import FileManager
|
|
3
|
+
from .op_summary_bean import OpSummaryBean
|
|
4
|
+
from .cann_file_parser import CANNFileParser, CANNDataEnum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OpSummaryParser():
|
|
8
|
+
|
|
9
|
+
def __init__(self, profiler_path: str):
|
|
10
|
+
self._profiler_path = profiler_path
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def _project_map_for_headers(cls, input_headers: list):
|
|
14
|
+
project_map_dict = {OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS[i]: OpSummaryHeaders.OP_SUMMARY_KERNEL_BASE_HEADERS[i] for i in
|
|
15
|
+
range(len(OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS))}
|
|
16
|
+
output_headers = []
|
|
17
|
+
for header in input_headers:
|
|
18
|
+
if header in project_map_dict:
|
|
19
|
+
output_headers.append(project_map_dict.get(header))
|
|
20
|
+
else:
|
|
21
|
+
output_headers.append(header)
|
|
22
|
+
return output_headers
|
|
23
|
+
|
|
24
|
+
def generate_op_summary_data(self) -> dict:
|
|
25
|
+
op_summary_file_set = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.OP_SUMMARY)
|
|
26
|
+
summary_data = []
|
|
27
|
+
for file_path in op_summary_file_set:
|
|
28
|
+
all_data = FileManager.read_csv_file(file_path, OpSummaryBean)
|
|
29
|
+
if all_data:
|
|
30
|
+
OpSummaryBean.headers = OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS
|
|
31
|
+
output_headers = OpSummaryBean.headers
|
|
32
|
+
summary_data.extend([data.row for data in all_data])
|
|
33
|
+
else:
|
|
34
|
+
raise RuntimeError("parse op summary csv failed.")
|
|
35
|
+
|
|
36
|
+
return self.create_dict(summary_data[0],output_headers)
|
|
37
|
+
|
|
38
|
+
def create_dict(self,summary_data,headers):
|
|
39
|
+
summary_dict={}
|
|
40
|
+
for i in range(len(summary_data)):
|
|
41
|
+
summary_dict[headers[i]] = summary_data[i]
|
|
42
|
+
return summary_dict
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
class PathManager:
|
|
5
|
+
|
|
6
|
+
@classmethod
|
|
7
|
+
def get_cann_path(cls, profiler_path: str) -> str:
|
|
8
|
+
sub_dirs = os.listdir(os.path.realpath(profiler_path))
|
|
9
|
+
for sub_dir in sub_dirs:
|
|
10
|
+
sub_path = os.path.join(profiler_path, sub_dir)
|
|
11
|
+
if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z]+", sub_dir):
|
|
12
|
+
return sub_path
|
|
13
|
+
return ""
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def get_device_path(cls, cann_path: str) -> str:
|
|
17
|
+
sub_dirs = os.listdir(os.path.realpath(cann_path))
|
|
18
|
+
for sub_dir in sub_dirs:
|
|
19
|
+
sub_path = os.path.join(cann_path, sub_dir)
|
|
20
|
+
if os.path.isdir(sub_path) and re.match(r"^device_\d", sub_dir):
|
|
21
|
+
return sub_path
|
|
22
|
+
return ""
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get_start_info_path(cls, cann_path: str) -> str:
|
|
26
|
+
start_info_path = os.path.join(cann_path, "host", "start_info")
|
|
27
|
+
if os.path.exists(start_info_path):
|
|
28
|
+
return start_info_path
|
|
29
|
+
device_path = cls.get_device_path(cann_path)
|
|
30
|
+
if not device_path:
|
|
31
|
+
return ""
|
|
32
|
+
device_path_split = os.path.basename(device_path).split("_")
|
|
33
|
+
if len(device_path_split) != 2:
|
|
34
|
+
return ""
|
|
35
|
+
start_info_file = f"start_info.{device_path_split[1]}"
|
|
36
|
+
start_info_path = os.path.join(device_path, start_info_file)
|
|
37
|
+
if os.path.exists(start_info_path):
|
|
38
|
+
return start_info_path
|
|
39
|
+
return ""
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def get_device_all_file_list_by_type(cls, profiler_path: str, summary_or_timeline: str) -> list:
|
|
43
|
+
file_list = []
|
|
44
|
+
_path = os.path.join(cls.get_device_path(profiler_path), summary_or_timeline)
|
|
45
|
+
if not os.path.isdir(_path):
|
|
46
|
+
return file_list
|
|
47
|
+
sub_files = os.listdir(os.path.realpath(_path))
|
|
48
|
+
if not sub_files:
|
|
49
|
+
return file_list
|
|
50
|
+
for sub_file in sub_files:
|
|
51
|
+
file_list.append(os.path.join(_path, sub_file))
|
|
52
|
+
return file_list
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def get_analyze_all_file(cls, profiler_path: str, analyze: str) -> list:
|
|
56
|
+
file_list = []
|
|
57
|
+
_path = os.path.join(profiler_path, analyze)
|
|
58
|
+
if not os.path.isdir(_path):
|
|
59
|
+
return file_list
|
|
60
|
+
sub_files = os.listdir(os.path.realpath(_path))
|
|
61
|
+
if not sub_files:
|
|
62
|
+
return file_list
|
|
63
|
+
for sub_file in sub_files:
|
|
64
|
+
file_list.append(os.path.join(_path, sub_file))
|
|
65
|
+
return file_list
|
|
@@ -519,8 +519,13 @@ def _update_workspace_data(kernel_name, input_for_mod, output_indexes):
|
|
|
519
519
|
kernel_desc = json.loads(kernel_json)
|
|
520
520
|
if "workspace" in kernel_desc:
|
|
521
521
|
workspace_bytes = kernel_desc["workspace"]["size"]
|
|
522
|
-
|
|
523
|
-
|
|
522
|
+
workspace_num = kernel_desc["workspace"]["num"]
|
|
523
|
+
if len(workspace_bytes) != workspace_num:
|
|
524
|
+
raise ValueError("workspace num %s and size shape %s are not equal!"
|
|
525
|
+
% (len(workspace_bytes), workspace_num))
|
|
526
|
+
for i in range(kernel_desc["workspace"]["num"]):
|
|
527
|
+
item = np.full(workspace_bytes[i], np.nan, np.int8)
|
|
528
|
+
workspace_tensors.append(item)
|
|
524
529
|
else:
|
|
525
530
|
logging.warning("Kernel json file %s not found", json_file)
|
|
526
531
|
|
|
@@ -24,9 +24,23 @@ import akg.tvm
|
|
|
24
24
|
from akg.global_configs import get_kernel_meta_path
|
|
25
25
|
from akg.utils.util import parse_workspace, write_code
|
|
26
26
|
|
|
27
|
+
def set_ascend910b(code, core_type, title_dict):
|
|
28
|
+
if len(core_type) == 0:
|
|
29
|
+
return
|
|
30
|
+
if core_type == "MIX":
|
|
31
|
+
title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF"
|
|
32
|
+
title_dict["coreType"] = "MIX"
|
|
33
|
+
title_dict["intercoreSync"] = 1
|
|
34
|
+
title_dict["taskRation"] = "1:2"
|
|
35
|
+
elif core_type == "AIC":
|
|
36
|
+
title_dict["coreType"] = "AiCore"
|
|
37
|
+
title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AICUBE"
|
|
38
|
+
elif core_type == "AIV":
|
|
39
|
+
title_dict["coreType"] = "VectorCore"
|
|
40
|
+
title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AIVEC"
|
|
27
41
|
|
|
28
42
|
@akg.tvm.register_func
|
|
29
|
-
def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
|
|
43
|
+
def tvm_callback_cce_postproc(code, block_dim=1, workspace=None, core_type=""):
|
|
30
44
|
"""Function for dumping ascend meta."""
|
|
31
45
|
if "__aicore__" in code:
|
|
32
46
|
title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"}
|
|
@@ -35,8 +49,13 @@ def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
|
|
|
35
49
|
title_dict = dict()
|
|
36
50
|
|
|
37
51
|
# kernel name
|
|
38
|
-
|
|
39
|
-
|
|
52
|
+
if "_kernel" in code:
|
|
53
|
+
kernel_name = code.split("_kernel")[0].split(" ")[-1]
|
|
54
|
+
title_dict["kernelName"] = kernel_name + "_kernel0"
|
|
55
|
+
elif "_mix_aic" in code:
|
|
56
|
+
kernel_name = code.split("_mix_aic")[0].split(" ")[-1]
|
|
57
|
+
title_dict["kernelName"] = kernel_name
|
|
58
|
+
set_ascend910b(code, core_type, title_dict)
|
|
40
59
|
|
|
41
60
|
# thread info
|
|
42
61
|
title_dict["blockDim"] = block_dim
|
|
@@ -43,10 +43,12 @@ from akg.utils import custom_tiling as ct_util
|
|
|
43
43
|
from akg.utils import validation_check as vc_util
|
|
44
44
|
from akg.utils.dsl_create import TensorUtils
|
|
45
45
|
from akg.utils.util import parse_kwargs
|
|
46
|
-
from akg.backend.parsing_profiling_data import HWTSLogParser
|
|
46
|
+
from akg.backend.parsing_profiling_data import HWTSLogParser, max_time_consume
|
|
47
47
|
from akg.backend.parsing_profiling_data import validate_and_normalize_path
|
|
48
48
|
from akg.backend import aic_model
|
|
49
|
-
|
|
49
|
+
from .ascend_profilier.cann_file_parser import CANNFileParser
|
|
50
|
+
from .ascend_profilier.op_summary_parser import OpSummaryParser
|
|
51
|
+
from .ascend_profilier.op_summary_headers import OpSummaryHeaders
|
|
50
52
|
sh = logging.StreamHandler(sys.stdout)
|
|
51
53
|
logging.getLogger().addHandler(sh)
|
|
52
54
|
logging.getLogger().setLevel(logging.INFO)
|
|
@@ -169,7 +171,7 @@ def gen_name_kernel(kernel, dtype, shapes):
|
|
|
169
171
|
return res
|
|
170
172
|
|
|
171
173
|
|
|
172
|
-
def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
174
|
+
def profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=None):
|
|
173
175
|
"""
|
|
174
176
|
Function for collecting cycle data from device.
|
|
175
177
|
|
|
@@ -180,21 +182,45 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
|
180
182
|
tuning: tuning model.
|
|
181
183
|
device_id: device_id on device.
|
|
182
184
|
"""
|
|
183
|
-
akg.tvm.get_global_func("ascend_start_profiling")(
|
|
185
|
+
akg.tvm.get_global_func("ascend_start_profiling")(kernel_name)
|
|
184
186
|
time_before_launch = time.time()
|
|
185
187
|
output_data = ascend_run(kernel_name, args, outputs, device_id)
|
|
186
188
|
akg.tvm.get_global_func("ascend_stop_profiling")()
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
189
|
+
cycle = 0
|
|
190
|
+
if arch is not None and "910B" in arch:
|
|
191
|
+
# for ascend910B profiling
|
|
192
|
+
cycle = profiling_analyse_910B(time_before_launch)
|
|
193
|
+
else:
|
|
194
|
+
cycle = profiling_analyse(device_id, time_before_launch)
|
|
195
|
+
logging.info('=====Task Duration(us)==============================')
|
|
190
196
|
if cycle != PROF_ERROR_CODE:
|
|
191
197
|
logging.info(cycle)
|
|
192
198
|
else:
|
|
193
|
-
logging.error("OOPS, can't correctly
|
|
199
|
+
logging.error("OOPS, can't correctly Task Duration!")
|
|
194
200
|
TestUtils.record_cycle(cycle)
|
|
195
|
-
logging.info('=====
|
|
201
|
+
logging.info('=====Task Duration(us)==============================')
|
|
196
202
|
return output_data, {'run_time': cycle}
|
|
197
203
|
|
|
204
|
+
def profiling_analyse_910B(time_before_launch):
|
|
205
|
+
public_path = os.getenv('PROFILING_DIR')
|
|
206
|
+
if public_path is None:
|
|
207
|
+
raise RuntimeError("Environment PROFILING_DIR not set!")
|
|
208
|
+
public_path = validate_and_normalize_path(public_path)
|
|
209
|
+
CANNFileParser(public_path).export_cann_profiling()
|
|
210
|
+
cann_file_parser = OpSummaryParser(public_path)
|
|
211
|
+
profiler_file = cann_file_parser._profiler_path
|
|
212
|
+
logging.debug("prof file is: %s", os.path.basename(profiler_file))
|
|
213
|
+
file_create_time = os.path.getctime(profiler_file)
|
|
214
|
+
if file_create_time < time_before_launch:
|
|
215
|
+
raise RuntimeError("The PROF file is too old")
|
|
216
|
+
datas:dict = cann_file_parser.generate_op_summary_data()
|
|
217
|
+
task_duration = float(datas.get(OpSummaryHeaders.TASK_DURATION,max_time_consume))
|
|
218
|
+
# # aic_total_cycles means ai core cycle
|
|
219
|
+
# # aiv_total_cycles means ai vector cycle
|
|
220
|
+
# aiv_total_cycle = int(datas.get(OpSummaryHeaders.AIV_TOTAL_CYCLES,max_time_consume))
|
|
221
|
+
# aic_total_cycle = int(datas.get(OpSummaryHeaders.AIC_TOTAL_CYCLES,max_time_consume))
|
|
222
|
+
# return aiv_total_cycle+aic_total_cycle
|
|
223
|
+
return task_duration
|
|
198
224
|
|
|
199
225
|
def profiling_analyse(device_id, time_before_launch):
|
|
200
226
|
"""analyse profiling."""
|
|
@@ -348,12 +374,12 @@ def get_kernel_name_from_mod(mod):
|
|
|
348
374
|
return kernel_name
|
|
349
375
|
|
|
350
376
|
|
|
351
|
-
def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1):
|
|
377
|
+
def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1, arch=None):
|
|
352
378
|
gc.collect()
|
|
353
379
|
if device_id == -1:
|
|
354
380
|
device_id = int(os.environ.get("DEVICE_ID", 0))
|
|
355
381
|
kernel_name = get_kernel_name_from_mod(mod)
|
|
356
|
-
return profiling_mode_run(kernel_name, args, outputs, tuning, device_id)
|
|
382
|
+
return profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=arch)
|
|
357
383
|
|
|
358
384
|
|
|
359
385
|
def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, device_id=-1, repeat_time=400):
|
|
@@ -387,7 +413,7 @@ def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, devi
|
|
|
387
413
|
|
|
388
414
|
|
|
389
415
|
@func_time_required
|
|
390
|
-
def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400):
|
|
416
|
+
def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400, arch=None):
|
|
391
417
|
"""
|
|
392
418
|
unified run CCE kernel api.
|
|
393
419
|
|
|
@@ -398,7 +424,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
398
424
|
tuning (bool): tuning model.
|
|
399
425
|
device_id: device_id on device.
|
|
400
426
|
expect: when mode in ["compile_cloud", "compile_mini"], return it.
|
|
401
|
-
|
|
427
|
+
arch: Ascend arch type
|
|
402
428
|
Returns:
|
|
403
429
|
output numpy array, or tuple of numpy array if multi-output.
|
|
404
430
|
"""
|
|
@@ -414,7 +440,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
414
440
|
run_args = [kernel_name, args, outputs, device_id]
|
|
415
441
|
if os.environ.get("PROFILING_MODE") == "true":
|
|
416
442
|
run_func = profiling_mode_run
|
|
417
|
-
run_args = [kernel_name, args, outputs, tuning, device_id]
|
|
443
|
+
run_args = [kernel_name, args, outputs, tuning, device_id, arch]
|
|
418
444
|
if os.environ.get("PROFILING_DIR", None) is None:
|
|
419
445
|
os.environ["PROFILING_DIR"] = "."
|
|
420
446
|
logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
|
|
@@ -1031,7 +1057,7 @@ def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
|
|
|
1031
1057
|
for buf in enumerate(arg_list):
|
|
1032
1058
|
args_json.append(akg.tvm.save_json(buf, "0.8.0"))
|
|
1033
1059
|
|
|
1034
|
-
is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs
|
|
1060
|
+
is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs)
|
|
1035
1061
|
if not is_success:
|
|
1036
1062
|
raise TypeError("npu_inference codegen failed.")
|
|
1037
1063
|
return kernel_name
|