mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_akg/akg/composite/build_module.py +104 -20
  3. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  4. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  5. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  6. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  7. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  8. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  9. mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
  10. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  11. mindspore/_akg/akg/utils/kernel_exec.py +41 -15
  12. mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
  13. mindspore/_akg/akg/utils/util.py +56 -1
  14. mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
  15. mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
  16. mindspore/_checkparam.py +3 -3
  17. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  18. mindspore/_extends/graph_kernel/splitter.py +3 -2
  19. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
  20. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
  21. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  22. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
  23. mindspore/_extends/parse/__init__.py +3 -2
  24. mindspore/_extends/parse/parser.py +6 -1
  25. mindspore/_extends/parse/standard_method.py +14 -11
  26. mindspore/_extends/remote/kernel_build_server.py +2 -1
  27. mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
  28. mindspore/bin/cache_admin +0 -0
  29. mindspore/bin/cache_server +0 -0
  30. mindspore/common/_utils.py +16 -0
  31. mindspore/common/api.py +1 -1
  32. mindspore/common/auto_dynamic_shape.py +81 -85
  33. mindspore/common/dump.py +1 -1
  34. mindspore/common/tensor.py +3 -20
  35. mindspore/config/op_info.config +1 -1
  36. mindspore/context.py +11 -4
  37. mindspore/dataset/engine/cache_client.py +8 -5
  38. mindspore/dataset/engine/datasets_standard_format.py +5 -0
  39. mindspore/dataset/vision/transforms.py +21 -21
  40. mindspore/experimental/optim/adam.py +1 -1
  41. mindspore/gen_ops.py +1 -1
  42. mindspore/include/api/model.h +17 -0
  43. mindspore/include/api/status.h +8 -3
  44. mindspore/lib/libdnnl.so.2 +0 -0
  45. mindspore/lib/libmindspore.so +0 -0
  46. mindspore/lib/libmindspore_backend.so +0 -0
  47. mindspore/lib/libmindspore_common.so +0 -0
  48. mindspore/lib/libmindspore_core.so +0 -0
  49. mindspore/lib/libmindspore_glog.so.0 +0 -0
  50. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  51. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  52. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  53. mindspore/lib/libmindspore_shared_lib.so +0 -0
  54. mindspore/lib/libnnacl.so +0 -0
  55. mindspore/lib/libopencv_core.so.4.5 +0 -0
  56. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  57. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  58. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  59. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  60. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  61. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  62. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  63. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  64. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  65. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  66. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  67. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  68. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  69. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  70. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  71. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  72. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
  74. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  75. mindspore/lib/plugin/ascend/libakg.so +0 -0
  76. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  77. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  78. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  79. mindspore/lib/plugin/cpu/libakg.so +0 -0
  80. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  81. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  82. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  83. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  84. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  85. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  86. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  87. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  88. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  89. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  90. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  91. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  92. mindspore/nn/cell.py +0 -3
  93. mindspore/nn/layer/activation.py +4 -5
  94. mindspore/nn/layer/conv.py +39 -23
  95. mindspore/nn/layer/flash_attention.py +54 -129
  96. mindspore/nn/layer/math.py +3 -7
  97. mindspore/nn/layer/rnn_cells.py +5 -5
  98. mindspore/nn/wrap/__init__.py +4 -2
  99. mindspore/nn/wrap/cell_wrapper.py +12 -3
  100. mindspore/numpy/utils_const.py +5 -5
  101. mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
  102. mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
  103. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
  104. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  105. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  106. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  107. mindspore/ops/_utils/utils.py +2 -0
  108. mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
  109. mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
  110. mindspore/ops/function/array_func.py +10 -7
  111. mindspore/ops/function/grad/grad_func.py +0 -1
  112. mindspore/ops/function/nn_func.py +98 -9
  113. mindspore/ops/function/random_func.py +2 -1
  114. mindspore/ops/op_info_register.py +24 -21
  115. mindspore/ops/operations/__init__.py +6 -2
  116. mindspore/ops/operations/_grad_ops.py +25 -6
  117. mindspore/ops/operations/_inner_ops.py +155 -23
  118. mindspore/ops/operations/array_ops.py +9 -7
  119. mindspore/ops/operations/comm_ops.py +2 -2
  120. mindspore/ops/operations/custom_ops.py +85 -68
  121. mindspore/ops/operations/inner_ops.py +26 -3
  122. mindspore/ops/operations/math_ops.py +7 -6
  123. mindspore/ops/operations/nn_ops.py +193 -49
  124. mindspore/parallel/_parallel_serialization.py +10 -3
  125. mindspore/parallel/_tensor.py +4 -1
  126. mindspore/parallel/checkpoint_transform.py +13 -2
  127. mindspore/parallel/shard.py +17 -10
  128. mindspore/profiler/common/util.py +1 -0
  129. mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
  130. mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
  131. mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
  132. mindspore/profiler/parser/ascend_op_generator.py +1 -1
  133. mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
  134. mindspore/profiler/parser/base_timeline_generator.py +1 -1
  135. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
  136. mindspore/profiler/parser/framework_parser.py +1 -1
  137. mindspore/profiler/parser/profiler_info.py +19 -0
  138. mindspore/profiler/profiling.py +46 -24
  139. mindspore/rewrite/api/pattern_engine.py +1 -1
  140. mindspore/rewrite/parsers/for_parser.py +7 -7
  141. mindspore/rewrite/parsers/module_parser.py +4 -4
  142. mindspore/rewrite/symbol_tree.py +1 -4
  143. mindspore/run_check/_check_version.py +5 -3
  144. mindspore/safeguard/rewrite_obfuscation.py +52 -28
  145. mindspore/scipy/ops.py +55 -5
  146. mindspore/scipy/optimize/__init__.py +3 -2
  147. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  148. mindspore/train/callback/_summary_collector.py +1 -1
  149. mindspore/train/dataset_helper.py +1 -0
  150. mindspore/train/model.py +2 -2
  151. mindspore/train/serialization.py +97 -11
  152. mindspore/train/summary/_summary_adapter.py +1 -1
  153. mindspore/train/summary/summary_record.py +23 -7
  154. mindspore/version.py +1 -1
  155. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
  156. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
  157. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
  158. mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
  159. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
  160. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
  161. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
  162. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
  163. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  164. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  165. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  166. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  167. /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
  168. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  169. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
  170. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
mindspore/.commit_id CHANGED
@@ -1 +1 @@
1
- __commit_id__ = '[sha1]:9390851d,[branch]:(HEAD,origin/r2.2,r2.2)'
1
+ __commit_id__ = '[sha1]:8c390933,[branch]:(HEAD,origin/r2.2,r2.2)'
@@ -19,16 +19,17 @@ import os
19
19
  import json
20
20
  from collections.abc import Iterable
21
21
  import akg
22
+ import math
22
23
  from akg import tvm
23
24
  from tvm.autotvm.env import AutotvmGlobalScope
25
+ from akg.utils.util import parse_workspace_map
24
26
  from akg.utils.tbe_codegen_utils import build_tbe_codegen
25
27
  from akg.utils.kernel_exec import ReturnType, is_symbolic_tiling
26
28
  from .split_stitch import split_stitch_attr
27
29
  from .construct_args import ConstructType, ConstructKey
28
30
  from .construct_args import get_construct_args, get_tune_construct_args, \
29
31
  should_enable_attr, get_stmt_for_tune, add_attrs_in_segment_infos
30
-
31
-
32
+ from utils.util import get_ascend_type
32
33
  def generate_trait(desc):
33
34
  """
34
35
  generate trait of kernel description
@@ -314,12 +315,12 @@ def merge_attrs(attrs_a, attrs_b):
314
315
  return attrs
315
316
 
316
317
 
317
- def read_repo_file(repo_file):
318
+ def read_repo_file(repo_file, is_json_load=True):
318
319
  if not os.path.exists(repo_file):
319
320
  return {}
320
321
  with open(repo_file, 'r') as f:
321
- repo = json.loads(f.read())
322
- return repo
322
+ repo = f.read()
323
+ return json.loads(repo) if is_json_load else repo
323
324
 
324
325
 
325
326
  def _get_default_repository_file(process):
@@ -523,17 +524,6 @@ def _build_to_module(desc_s, desc_d, attrs=None, poly=True):
523
524
 
524
525
  return _cpp_build(attrs, process, poly, segment_tree, segment_infos)
525
526
 
526
-
527
- def _get_ascend_type(desc):
528
- if "target_info" not in desc.keys():
529
- return None
530
-
531
- target_info_type = desc["target_info"]
532
- if target_info_type.get("arch"):
533
- return target_info_type.get("arch")
534
- return None
535
-
536
-
537
527
  def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
538
528
  """
539
529
  build kernel with compute description in json format
@@ -650,7 +640,8 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
650
640
  ConstructType.NORMAL: _normal_postprocess,
651
641
  }
652
642
  process = desc_d_in["process"]
653
- ascend_type = _get_ascend_type(desc_d_in)
643
+ kernel_name = desc_d_in['op']
644
+ ascend_type = get_ascend_type(desc_d_in)
654
645
  ascend_type_to_section = {"Ascend910A": "1.6", "Ascend310P3": "1.7",
655
646
  "Ascend910B1": "2.1", "Ascend910B2": "2.2", "Ascend910B3": "2.3", "Ascend910B4": "2.4"}
656
647
  if ascend_type is not None:
@@ -659,22 +650,115 @@ def _build_to_module_ascend(desc_s_in, desc_d_in, attr, use_repo=True):
659
650
  config_func(section)
660
651
  if section >= "2.1":
661
652
  attr["is_tbe_codegen"] = True
653
+ attr["pragma_modshift"] = True
662
654
  segment_tree, segment_infos = get_construct_args(desc_s_in, attr, post_funcs)
655
+
656
+ if desc_d_in.get("enable_cce_lib"):
657
+ attr["enable_cce_lib"] = True
658
+ return _build_to_module_ascend_lib(desc_s_in, kernel_name)
659
+
663
660
  poly = True
664
661
  res = _cpp_build(attr, process, poly, segment_tree, segment_infos)
665
662
  if attr.get("is_tbe_codegen"):
666
- kernel_name = desc_d_in['op']
667
663
  stmt_json = akg.tvm.save_json(res[0], "0.8.0")
668
664
  args_json = []
669
665
  for buf in res[1]:
670
666
  args_json.append(akg.tvm.save_json(buf, "0.8.0"))
667
+
668
+ workspace_dict = parse_workspace_map(res[2])
669
+ if workspace_dict is not None:
670
+ attr["workspace"] = workspace_dict
671
671
 
672
- is_success = build_tbe_codegen(kernel_name, stmt_json, args_json, ascend_type, attr.get("dynamic", False))
672
+ is_success = build_tbe_codegen(kernel_name, stmt_json, args_json, attr, ascend_type)
673
673
  if not is_success:
674
674
  raise TypeError("npu_inference codegen failed.")
675
675
  return kernel_name
676
676
  return res
677
677
 
678
+ def _build_to_module_ascend_lib(desc_s_in, kernel_name):
679
+ def _get_all_shape(shapes):
680
+ shape_split = shapes.split(".")
681
+ shape_list = []
682
+ for shape in shape_split:
683
+ if "-" in shape:
684
+ tmp_shape = shape.split("-")[0]
685
+ for _ in range(shape.count("-") + 1):
686
+ shape_list.append(tmp_shape)
687
+ else:
688
+ shape_list.append(shape)
689
+ return shape_list
690
+
691
+ def _get_tiling_info(desc_s):
692
+ compute, shape, dtype = generate_trait(desc_s)
693
+ tiling_info = {}
694
+ if "MatMul" in compute:
695
+ trans_a = compute.split("_")[1]
696
+ trans_b = compute.split("_")[-1].split(".")[0]
697
+
698
+ shape_list = _get_all_shape(shape)
699
+ bias_flag = int(len(shape_list) > 3)
700
+ tensor_A = shape_list[0]
701
+ tensor_B = shape_list[1]
702
+
703
+ tensor_A_split = tensor_A.split("_")
704
+ if len(tensor_A_split) > 2:
705
+ batch_size = int(tensor_A.split("_")[0])
706
+ else:
707
+ batch_size = 1
708
+ if trans_a == "1":
709
+ M = int(tensor_A_split[-1])
710
+ K = int(tensor_A_split[-2])
711
+ else:
712
+ M = int(tensor_A_split[-2])
713
+ K = int(tensor_A_split[-1])
714
+
715
+ if trans_b == "1":
716
+ N = int(tensor_B.split("_")[-2])
717
+ else:
718
+ N = int(tensor_B.split("_")[-1])
719
+ tensor_A_type = str(dtype.split("-")[0])
720
+ tiling_info = {"batch_size":batch_size, "M": M, "N": N, "K": K, "trans_a": int(trans_a), "trans_b": int(trans_b),
721
+ "tensor_A_type": tensor_A_type, "bias_flag": bias_flag, "op_type": "MatMul"}
722
+ elif "PagedAttention" in compute or "PagedAttentionMask" in compute:
723
+ shape_list = _get_all_shape(shape)
724
+ query = shape_list[0]
725
+ key_cache = shape_list[1]
726
+ table_shape = shape_list[3]
727
+
728
+ num_tokens = int(query.split("_")[0])
729
+ num_heads = int(query.split("_")[1])
730
+ embedding_size = int(query.split("_")[2])
731
+ num_blocks = int(key_cache.split("_")[0])
732
+ block_size = int(key_cache.split("_")[1])
733
+ kv_heads = int(key_cache.split("_")[2])
734
+
735
+ max_num_blocks_per_query = int(table_shape.split("_")[1])
736
+ tor = float(1.0 / math.sqrt(1.0 * embedding_size))
737
+
738
+ tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "embedding_size": embedding_size,
739
+ "num_blocks": num_blocks, "block_size": block_size, "max_num_blocks_per_query": max_num_blocks_per_query,
740
+ "tor": tor, "kv_heads": kv_heads, "op_type": "PagedAttention"}
741
+ if "PagedAttentionMask" in compute:
742
+ mask_shape = shape_list[5]
743
+ tiling_info["mask"] = list(map(int, mask_shape.split("_")))
744
+ tiling_info["op_type"] = "PagedAttentionMask"
745
+ elif "ReshapeAndCache" in compute:
746
+ shape_list = _get_all_shape(shape)
747
+ kv = shape_list[0]
748
+
749
+ num_tokens = int(kv.split("_")[0])
750
+ num_heads = int(kv.split("_")[1])
751
+ head_size = int(kv.split("_")[2])
752
+
753
+ tiling_info = {"num_tokens": num_tokens, "num_heads": num_heads, "head_size": head_size,
754
+ "op_type": "ReshapeAndCache"}
755
+ return tiling_info
756
+
757
+ func = tvm.get_global_func("build_cce_lib")
758
+ tiling_info = _get_tiling_info(json.loads(desc_s_in))
759
+ func(kernel_name, tiling_info, None)
760
+ return kernel_name
761
+
678
762
  def _set_backend(desc_d):
679
763
  desc_d_process = desc_d
680
764
  for i, op in enumerate(desc_d.get("op_desc")):
@@ -772,4 +856,4 @@ def get_tiling_space(kernel_desc, level=1, attr=None):
772
856
  spaces['c0_mod'] = ret.c0_tile_mod_table.asnumpy().tolist()
773
857
  if level >= 2:
774
858
  spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
775
- return spaces
859
+ return spaces
@@ -0,0 +1,76 @@
1
+ import os
2
+ import re
3
+ import subprocess
4
+ from enum import Enum
5
+
6
+ from .file_manager import FileManager
7
+ from .path_manager import PathManager
8
+
9
+
10
+ class CANNDataEnum(Enum):
11
+ OP_SUMMARY = 0
12
+ NPU_MEMORY = 1
13
+ MSPROF_TIMELINE = 2
14
+ STEP_TRACE = 3
15
+ GE_MEMORY_RECORD = 4
16
+ GE_OPERATOR_MEMORY = 5
17
+ L2_CACHE = 6
18
+ AI_CPU = 7
19
+ COMMUNICATION = 8
20
+ MATRIX = 9
21
+
22
+
23
+ class CANNFileParser:
24
+ COMMAND_SUCCESS = 0
25
+ ACL_TO_NPU = "acl_to_npu"
26
+ START_FLOW = "s"
27
+ END_FLOW = "f"
28
+ SUMMARY = "summary"
29
+ TIMELINE = "timeline"
30
+ ANALYZE = "analyze"
31
+ CANN_DATA_MATCH = {
32
+ CANNDataEnum.OP_SUMMARY: [r"^op_summary_\d+_\d+\.csv", r"^op_summary_\d+_\d+_\d+\.csv",
33
+ r"^op_summary_\d+_\d+_\d+_\d+\.csv"],
34
+ }
35
+
36
+ def __init__(self, profiler_path: str):
37
+ self._cann_path = PathManager.get_cann_path(profiler_path)
38
+ self._file_dict = {}
39
+ self._file_dispatch()
40
+
41
+ def export_cann_profiling(self):
42
+ if not os.path.isdir(self._cann_path):
43
+ return
44
+ self._del_summary_and_timeline_data()
45
+ completed_process = subprocess.run(["msprof", "--export=on", f"--output={self._cann_path}"],
46
+ capture_output=True)
47
+ if completed_process.returncode != self.COMMAND_SUCCESS:
48
+ raise RuntimeError(
49
+ f"Export CANN Profiling data failed, please verify that the ascend-toolkit is installed and set-env.sh "
50
+ f"is sourced. or you can execute the command to confirm the CANN Profiling export result: "
51
+ f"msprof --export=on --output={self._cann_path}")
52
+
53
+
54
+ def get_file_list_by_type(self, file_type: CANNDataEnum) -> set:
55
+ return self._file_dict.get(file_type, set())
56
+
57
+ def _file_dispatch(self):
58
+ all_file_list = PathManager.get_device_all_file_list_by_type(self._cann_path, self.SUMMARY)
59
+ all_file_list += PathManager.get_device_all_file_list_by_type(self._cann_path, self.TIMELINE)
60
+ all_file_list += PathManager.get_analyze_all_file(self._cann_path, self.ANALYZE)
61
+ for file_path in all_file_list:
62
+ if not os.path.isfile(file_path):
63
+ continue
64
+ for data_type, re_match_exp_list in self.CANN_DATA_MATCH.items():
65
+ for re_match_exp in re_match_exp_list:
66
+ if re.match(re_match_exp, os.path.basename(file_path)):
67
+ self._file_dict.setdefault(data_type, set()).add(file_path)
68
+
69
+ def _del_summary_and_timeline_data(self):
70
+ device_path = PathManager.get_device_path(self._cann_path)
71
+ if not device_path:
72
+ return
73
+ summary_path = os.path.join(device_path, "summary")
74
+ timeline_path = os.path.join(device_path, "timeline")
75
+ FileManager.remove_file_safety(summary_path)
76
+ FileManager.remove_file_safety(timeline_path)
@@ -0,0 +1,56 @@
1
+ import csv
2
+ import json
3
+ import os.path
4
+ import shutil
5
+ from warnings import warn
6
+
7
+ MAX_FILE_SIZE = 1024 * 1024 * 1024 * 10
8
+ MAX_CSV_SIZE = 1024 * 1024 * 1024 * 5
9
+
10
+ class FileManager:
11
+ @classmethod
12
+ def file_read_all(cls, file_path: str, mode: str = "r") -> any:
13
+ if not os.path.isfile(file_path):
14
+ return ''
15
+ file_size = os.path.getsize(file_path)
16
+ if file_size <= 0:
17
+ return ''
18
+ if file_size > MAX_FILE_SIZE:
19
+ warn(f"The file size exceeds the preset value {MAX_FILE_SIZE / 1024 / 1024}MB, "
20
+ f"please check the file: {file_path}")
21
+ return ''
22
+ try:
23
+ with open(file_path, mode) as file:
24
+ return file.read()
25
+ except Exception:
26
+ raise RuntimeError(f"Can't read file: {file_path}")
27
+
28
+ @classmethod
29
+ def read_csv_file(cls, file_path: str, class_bean: any) -> list:
30
+ if not os.path.isfile(file_path):
31
+ return []
32
+ file_size = os.path.getsize(file_path)
33
+ if file_size <= 0:
34
+ return []
35
+ if file_size > MAX_CSV_SIZE:
36
+ warn(f"The file size exceeds the preset value {MAX_CSV_SIZE / 1024 / 1024}MB, "
37
+ f"please check the file: {file_path}")
38
+ return []
39
+ result_data = []
40
+ try:
41
+ with open(file_path, newline="") as csv_file:
42
+ reader = csv.DictReader(csv_file)
43
+ for row in reader:
44
+ result_data.append(class_bean(row))
45
+ except Exception:
46
+ raise RuntimeError(f"Failed to read the file: {file_path}")
47
+ return result_data
48
+
49
+ @classmethod
50
+ def remove_file_safety(cls, path: str):
51
+ if os.path.exists(path):
52
+ try:
53
+ shutil.rmtree(path)
54
+ except Exception:
55
+ print(f"[WARNING] [{os.getpid()}] profiler.py: Can't remove the directory: {path}")
56
+
@@ -0,0 +1,23 @@
1
+ from .op_summary_headers import OpSummaryHeaders
2
+
3
+ class OpSummaryBean:
4
+ headers = []
5
+
6
+ def __init__(self, data: list):
7
+ self._data = data
8
+
9
+ @property
10
+ def row(self) -> list:
11
+ row = []
12
+ read_headers = OpSummaryBean.headers if OpSummaryBean.headers else self._data.keys()
13
+ for field_name in read_headers:
14
+ row.append(self._data.get(field_name, ""))
15
+ return row
16
+
17
+ @property
18
+ def ts(self) -> float:
19
+ return float(self._data.get(OpSummaryHeaders.TASK_START_TIME, 0))
20
+
21
+ @property
22
+ def all_headers(self) -> list:
23
+ return list(self._data.keys())
@@ -0,0 +1,8 @@
1
+ class OpSummaryHeaders(object):
2
+ # op_summary
3
+ TASK_START_TIME = "Task Start Time(us)"
4
+ AIC_TOTAL_CYCLES = "aic_total_cycles"
5
+ AIV_TOTAL_CYCLES = "aiv_total_cycles"
6
+ TASK_DURATION = "Task Duration(us)"
7
+ OP_SUMMARY_SHOW_HEADERS = ["Op Name", "OP Type", "Task Type", TASK_START_TIME, TASK_DURATION,
8
+ "Task Wait Time(us)", "Block Dim" ,AIC_TOTAL_CYCLES, AIV_TOTAL_CYCLES]
@@ -0,0 +1,42 @@
1
+ from .op_summary_headers import OpSummaryHeaders
2
+ from .file_manager import FileManager
3
+ from .op_summary_bean import OpSummaryBean
4
+ from .cann_file_parser import CANNFileParser, CANNDataEnum
5
+
6
+
7
+ class OpSummaryParser():
8
+
9
+ def __init__(self, profiler_path: str):
10
+ self._profiler_path = profiler_path
11
+
12
+ @classmethod
13
+ def _project_map_for_headers(cls, input_headers: list):
14
+ project_map_dict = {OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS[i]: OpSummaryHeaders.OP_SUMMARY_KERNEL_BASE_HEADERS[i] for i in
15
+ range(len(OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS))}
16
+ output_headers = []
17
+ for header in input_headers:
18
+ if header in project_map_dict:
19
+ output_headers.append(project_map_dict.get(header))
20
+ else:
21
+ output_headers.append(header)
22
+ return output_headers
23
+
24
+ def generate_op_summary_data(self) -> dict:
25
+ op_summary_file_set = CANNFileParser(self._profiler_path).get_file_list_by_type(CANNDataEnum.OP_SUMMARY)
26
+ summary_data = []
27
+ for file_path in op_summary_file_set:
28
+ all_data = FileManager.read_csv_file(file_path, OpSummaryBean)
29
+ if all_data:
30
+ OpSummaryBean.headers = OpSummaryHeaders.OP_SUMMARY_SHOW_HEADERS
31
+ output_headers = OpSummaryBean.headers
32
+ summary_data.extend([data.row for data in all_data])
33
+ else:
34
+ raise RuntimeError("parse op summary csv failed.")
35
+
36
+ return self.create_dict(summary_data[0],output_headers)
37
+
38
+ def create_dict(self,summary_data,headers):
39
+ summary_dict={}
40
+ for i in range(len(summary_data)):
41
+ summary_dict[headers[i]] = summary_data[i]
42
+ return summary_dict
@@ -0,0 +1,65 @@
1
+ import os
2
+ import re
3
+
4
+ class PathManager:
5
+
6
+ @classmethod
7
+ def get_cann_path(cls, profiler_path: str) -> str:
8
+ sub_dirs = os.listdir(os.path.realpath(profiler_path))
9
+ for sub_dir in sub_dirs:
10
+ sub_path = os.path.join(profiler_path, sub_dir)
11
+ if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z]+", sub_dir):
12
+ return sub_path
13
+ return ""
14
+
15
+ @classmethod
16
+ def get_device_path(cls, cann_path: str) -> str:
17
+ sub_dirs = os.listdir(os.path.realpath(cann_path))
18
+ for sub_dir in sub_dirs:
19
+ sub_path = os.path.join(cann_path, sub_dir)
20
+ if os.path.isdir(sub_path) and re.match(r"^device_\d", sub_dir):
21
+ return sub_path
22
+ return ""
23
+
24
+ @classmethod
25
+ def get_start_info_path(cls, cann_path: str) -> str:
26
+ start_info_path = os.path.join(cann_path, "host", "start_info")
27
+ if os.path.exists(start_info_path):
28
+ return start_info_path
29
+ device_path = cls.get_device_path(cann_path)
30
+ if not device_path:
31
+ return ""
32
+ device_path_split = os.path.basename(device_path).split("_")
33
+ if len(device_path_split) != 2:
34
+ return ""
35
+ start_info_file = f"start_info.{device_path_split[1]}"
36
+ start_info_path = os.path.join(device_path, start_info_file)
37
+ if os.path.exists(start_info_path):
38
+ return start_info_path
39
+ return ""
40
+
41
+ @classmethod
42
+ def get_device_all_file_list_by_type(cls, profiler_path: str, summary_or_timeline: str) -> list:
43
+ file_list = []
44
+ _path = os.path.join(cls.get_device_path(profiler_path), summary_or_timeline)
45
+ if not os.path.isdir(_path):
46
+ return file_list
47
+ sub_files = os.listdir(os.path.realpath(_path))
48
+ if not sub_files:
49
+ return file_list
50
+ for sub_file in sub_files:
51
+ file_list.append(os.path.join(_path, sub_file))
52
+ return file_list
53
+
54
+ @classmethod
55
+ def get_analyze_all_file(cls, profiler_path: str, analyze: str) -> list:
56
+ file_list = []
57
+ _path = os.path.join(profiler_path, analyze)
58
+ if not os.path.isdir(_path):
59
+ return file_list
60
+ sub_files = os.listdir(os.path.realpath(_path))
61
+ if not sub_files:
62
+ return file_list
63
+ for sub_file in sub_files:
64
+ file_list.append(os.path.join(_path, sub_file))
65
+ return file_list
@@ -519,8 +519,13 @@ def _update_workspace_data(kernel_name, input_for_mod, output_indexes):
519
519
  kernel_desc = json.loads(kernel_json)
520
520
  if "workspace" in kernel_desc:
521
521
  workspace_bytes = kernel_desc["workspace"]["size"]
522
- item = np.full(workspace_bytes, np.nan, np.int8)
523
- workspace_tensors.append(item)
522
+ workspace_num = kernel_desc["workspace"]["num"]
523
+ if len(workspace_bytes) != workspace_num:
524
+ raise ValueError("workspace num %s and size shape %s are not equal!"
525
+ % (len(workspace_bytes), workspace_num))
526
+ for i in range(kernel_desc["workspace"]["num"]):
527
+ item = np.full(workspace_bytes[i], np.nan, np.int8)
528
+ workspace_tensors.append(item)
524
529
  else:
525
530
  logging.warning("Kernel json file %s not found", json_file)
526
531
 
@@ -24,9 +24,23 @@ import akg.tvm
24
24
  from akg.global_configs import get_kernel_meta_path
25
25
  from akg.utils.util import parse_workspace, write_code
26
26
 
27
+ def set_ascend910b(code, core_type, title_dict):
28
+ if len(core_type) == 0:
29
+ return
30
+ if core_type == "MIX":
31
+ title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF"
32
+ title_dict["coreType"] = "MIX"
33
+ title_dict["intercoreSync"] = 1
34
+ title_dict["taskRation"] = "1:2"
35
+ elif core_type == "AIC":
36
+ title_dict["coreType"] = "AiCore"
37
+ title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AICUBE"
38
+ elif core_type == "AIV":
39
+ title_dict["coreType"] = "VectorCore"
40
+ title_dict["magic"] = "RT_DEV_BINARY_MAGIC_ELF_AIVEC"
27
41
 
28
42
  @akg.tvm.register_func
29
- def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
43
+ def tvm_callback_cce_postproc(code, block_dim=1, workspace=None, core_type=""):
30
44
  """Function for dumping ascend meta."""
31
45
  if "__aicore__" in code:
32
46
  title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"}
@@ -35,8 +49,13 @@ def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
35
49
  title_dict = dict()
36
50
 
37
51
  # kernel name
38
- kernel_name = code.split("_kernel")[0].split(" ")[-1]
39
- title_dict["kernelName"] = kernel_name + "_kernel0"
52
+ if "_kernel" in code:
53
+ kernel_name = code.split("_kernel")[0].split(" ")[-1]
54
+ title_dict["kernelName"] = kernel_name + "_kernel0"
55
+ elif "_mix_aic" in code:
56
+ kernel_name = code.split("_mix_aic")[0].split(" ")[-1]
57
+ title_dict["kernelName"] = kernel_name
58
+ set_ascend910b(code, core_type, title_dict)
40
59
 
41
60
  # thread info
42
61
  title_dict["blockDim"] = block_dim
@@ -43,10 +43,12 @@ from akg.utils import custom_tiling as ct_util
43
43
  from akg.utils import validation_check as vc_util
44
44
  from akg.utils.dsl_create import TensorUtils
45
45
  from akg.utils.util import parse_kwargs
46
- from akg.backend.parsing_profiling_data import HWTSLogParser
46
+ from akg.backend.parsing_profiling_data import HWTSLogParser, max_time_consume
47
47
  from akg.backend.parsing_profiling_data import validate_and_normalize_path
48
48
  from akg.backend import aic_model
49
-
49
+ from .ascend_profilier.cann_file_parser import CANNFileParser
50
+ from .ascend_profilier.op_summary_parser import OpSummaryParser
51
+ from .ascend_profilier.op_summary_headers import OpSummaryHeaders
50
52
  sh = logging.StreamHandler(sys.stdout)
51
53
  logging.getLogger().addHandler(sh)
52
54
  logging.getLogger().setLevel(logging.INFO)
@@ -169,7 +171,7 @@ def gen_name_kernel(kernel, dtype, shapes):
169
171
  return res
170
172
 
171
173
 
172
- def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
174
+ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=None):
173
175
  """
174
176
  Function for collecting cycle data from device.
175
177
 
@@ -180,21 +182,45 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
180
182
  tuning: tuning model.
181
183
  device_id: device_id on device.
182
184
  """
183
- akg.tvm.get_global_func("ascend_start_profiling")(device_id)
185
+ akg.tvm.get_global_func("ascend_start_profiling")(kernel_name)
184
186
  time_before_launch = time.time()
185
187
  output_data = ascend_run(kernel_name, args, outputs, device_id)
186
188
  akg.tvm.get_global_func("ascend_stop_profiling")()
187
-
188
- cycle = profiling_analyse(device_id, time_before_launch)
189
- logging.info('=====parsing cycles==============================')
189
+ cycle = 0
190
+ if arch is not None and "910B" in arch:
191
+ # for ascend910B profiling
192
+ cycle = profiling_analyse_910B(time_before_launch)
193
+ else:
194
+ cycle = profiling_analyse(device_id, time_before_launch)
195
+ logging.info('=====Task Duration(us)==============================')
190
196
  if cycle != PROF_ERROR_CODE:
191
197
  logging.info(cycle)
192
198
  else:
193
- logging.error("OOPS, can't correctly parsing cycles!")
199
+ logging.error("OOPS, can't correctly Task Duration!")
194
200
  TestUtils.record_cycle(cycle)
195
- logging.info('=====parsing cycles==============================')
201
+ logging.info('=====Task Duration(us)==============================')
196
202
  return output_data, {'run_time': cycle}
197
203
 
204
+ def profiling_analyse_910B(time_before_launch):
205
+ public_path = os.getenv('PROFILING_DIR')
206
+ if public_path is None:
207
+ raise RuntimeError("Environment PROFILING_DIR not set!")
208
+ public_path = validate_and_normalize_path(public_path)
209
+ CANNFileParser(public_path).export_cann_profiling()
210
+ cann_file_parser = OpSummaryParser(public_path)
211
+ profiler_file = cann_file_parser._profiler_path
212
+ logging.debug("prof file is: %s", os.path.basename(profiler_file))
213
+ file_create_time = os.path.getctime(profiler_file)
214
+ if file_create_time < time_before_launch:
215
+ raise RuntimeError("The PROF file is too old")
216
+ datas:dict = cann_file_parser.generate_op_summary_data()
217
+ task_duration = float(datas.get(OpSummaryHeaders.TASK_DURATION,max_time_consume))
218
+ # # aic_total_cycles means ai core cycle
219
+ # # aiv_total_cycles means ai vector cycle
220
+ # aiv_total_cycle = int(datas.get(OpSummaryHeaders.AIV_TOTAL_CYCLES,max_time_consume))
221
+ # aic_total_cycle = int(datas.get(OpSummaryHeaders.AIC_TOTAL_CYCLES,max_time_consume))
222
+ # return aiv_total_cycle+aic_total_cycle
223
+ return task_duration
198
224
 
199
225
  def profiling_analyse(device_id, time_before_launch):
200
226
  """analyse profiling."""
@@ -348,12 +374,12 @@ def get_kernel_name_from_mod(mod):
348
374
  return kernel_name
349
375
 
350
376
 
351
- def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1):
377
+ def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1, arch=None):
352
378
  gc.collect()
353
379
  if device_id == -1:
354
380
  device_id = int(os.environ.get("DEVICE_ID", 0))
355
381
  kernel_name = get_kernel_name_from_mod(mod)
356
- return profiling_mode_run(kernel_name, args, outputs, tuning, device_id)
382
+ return profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=arch)
357
383
 
358
384
 
359
385
  def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, device_id=-1, repeat_time=400):
@@ -387,7 +413,7 @@ def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, devi
387
413
 
388
414
 
389
415
  @func_time_required
390
- def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400):
416
+ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400, arch=None):
391
417
  """
392
418
  unified run CCE kernel api.
393
419
 
@@ -398,7 +424,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
398
424
  tuning (bool): tuning model.
399
425
  device_id: device_id on device.
400
426
  expect: when mode in ["compile_cloud", "compile_mini"], return it.
401
-
427
+ arch: Ascend arch type
402
428
  Returns:
403
429
  output numpy array, or tuple of numpy array if multi-output.
404
430
  """
@@ -414,7 +440,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
414
440
  run_args = [kernel_name, args, outputs, device_id]
415
441
  if os.environ.get("PROFILING_MODE") == "true":
416
442
  run_func = profiling_mode_run
417
- run_args = [kernel_name, args, outputs, tuning, device_id]
443
+ run_args = [kernel_name, args, outputs, tuning, device_id, arch]
418
444
  if os.environ.get("PROFILING_DIR", None) is None:
419
445
  os.environ["PROFILING_DIR"] = "."
420
446
  logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
@@ -1031,7 +1057,7 @@ def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
1031
1057
  for buf in enumerate(arg_list):
1032
1058
  args_json.append(akg.tvm.save_json(buf, "0.8.0"))
1033
1059
 
1034
- is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs.get("dynamic", False))
1060
+ is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs)
1035
1061
  if not is_success:
1036
1062
  raise TypeError("npu_inference codegen failed.")
1037
1063
  return kernel_name