PyPI - mindspore - Versions diffs - 2.3.0__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl - Mend

mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (285) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +3 -1
mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
mindspore/_checkparam.py +50 -9
mindspore/_extends/parse/compile_config.py +41 -0
mindspore/_extends/parse/parser.py +9 -7
mindspore/_extends/parse/standard_method.py +52 -14
mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
mindspore/amp.py +24 -10
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/common/__init__.py +6 -4
mindspore/common/_pijit_context.py +190 -0
mindspore/common/_register_for_tensor.py +2 -1
mindspore/common/_tensor_overload.py +139 -0
mindspore/common/api.py +102 -87
mindspore/common/dump.py +5 -6
mindspore/common/generator.py +1 -7
mindspore/common/hook_handle.py +14 -26
mindspore/common/mindir_util.py +2 -2
mindspore/common/parameter.py +46 -13
mindspore/common/recompute.py +39 -9
mindspore/common/sparse_tensor.py +7 -3
mindspore/common/tensor.py +209 -29
mindspore/communication/__init__.py +1 -1
mindspore/communication/_comm_helper.py +38 -3
mindspore/communication/comm_func.py +310 -55
mindspore/communication/management.py +14 -14
mindspore/context.py +123 -22
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/__init__.py +1 -1
mindspore/dataset/core/config.py +7 -0
mindspore/dataset/core/validator_helpers.py +7 -0
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +72 -44
mindspore/dataset/engine/datasets_audio.py +7 -7
mindspore/dataset/engine/datasets_standard_format.py +53 -3
mindspore/dataset/engine/datasets_text.py +20 -20
mindspore/dataset/engine/datasets_user_defined.py +174 -104
mindspore/dataset/engine/datasets_vision.py +33 -33
mindspore/dataset/engine/iterators.py +29 -0
mindspore/dataset/engine/obs/util.py +7 -0
mindspore/dataset/engine/queue.py +114 -60
mindspore/dataset/engine/serializer_deserializer.py +2 -2
mindspore/dataset/engine/validators.py +34 -14
mindspore/dataset/text/__init__.py +1 -4
mindspore/dataset/transforms/__init__.py +0 -3
mindspore/dataset/utils/line_reader.py +2 -0
mindspore/dataset/vision/__init__.py +1 -4
mindspore/dataset/vision/utils.py +1 -1
mindspore/dataset/vision/validators.py +2 -1
mindspore/dnnl.dll +0 -0
mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
mindspore/experimental/es/embedding_service.py +883 -0
mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
mindspore/experimental/llm_boost/__init__.py +21 -0
mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
mindspore/experimental/llm_boost/register.py +129 -0
mindspore/experimental/llm_boost/utils.py +31 -0
mindspore/experimental/optim/adamw.py +85 -0
mindspore/experimental/optim/optimizer.py +3 -0
mindspore/hal/__init__.py +3 -3
mindspore/hal/contiguous_tensors_handle.py +175 -0
mindspore/hal/stream.py +18 -0
mindspore/include/api/model_group.h +13 -1
mindspore/include/api/types.h +10 -10
mindspore/include/dataset/config.h +2 -2
mindspore/include/dataset/constants.h +2 -2
mindspore/include/dataset/execute.h +2 -2
mindspore/include/dataset/vision.h +4 -0
mindspore/jpeg62.dll +0 -0
mindspore/log.py +1 -1
mindspore/mindrecord/filewriter.py +68 -51
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mint/__init__.py +495 -46
mindspore/mint/distributed/__init__.py +31 -0
mindspore/mint/distributed/distributed.py +254 -0
mindspore/mint/nn/__init__.py +266 -21
mindspore/mint/nn/functional.py +125 -19
mindspore/mint/nn/layer/__init__.py +39 -0
mindspore/mint/nn/layer/activation.py +133 -0
mindspore/mint/nn/layer/normalization.py +477 -0
mindspore/mint/nn/layer/pooling.py +110 -0
mindspore/mint/optim/adamw.py +28 -7
mindspore/mint/special/__init__.py +63 -0
mindspore/multiprocessing/__init__.py +2 -1
mindspore/nn/__init__.py +0 -1
mindspore/nn/cell.py +275 -93
mindspore/nn/layer/activation.py +211 -44
mindspore/nn/layer/basic.py +113 -3
mindspore/nn/layer/embedding.py +120 -2
mindspore/nn/layer/normalization.py +101 -5
mindspore/nn/layer/padding.py +34 -48
mindspore/nn/layer/pooling.py +161 -7
mindspore/nn/layer/transformer.py +3 -3
mindspore/nn/loss/__init__.py +2 -2
mindspore/nn/loss/loss.py +84 -6
mindspore/nn/optim/__init__.py +2 -1
mindspore/nn/optim/adadelta.py +1 -1
mindspore/nn/optim/adam.py +1 -1
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/tft_wrapper.py +127 -0
mindspore/nn/wrap/cell_wrapper.py +12 -23
mindspore/nn/wrap/grad_reducer.py +5 -5
mindspore/nn/wrap/loss_scale.py +17 -3
mindspore/numpy/__init__.py +1 -1
mindspore/numpy/array_creations.py +65 -68
mindspore/numpy/array_ops.py +64 -60
mindspore/numpy/fft.py +610 -75
mindspore/numpy/logic_ops.py +11 -10
mindspore/numpy/math_ops.py +85 -84
mindspore/numpy/utils_const.py +4 -4
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +6 -4
mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
mindspore/ops/_vmap/vmap_array_ops.py +2 -4
mindspore/ops/_vmap/vmap_math_ops.py +17 -1
mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
mindspore/ops/auto_generate/gen_extend_func.py +734 -13
mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
mindspore/ops/composite/base.py +85 -48
mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
mindspore/ops/function/__init__.py +22 -0
mindspore/ops/function/array_func.py +490 -153
mindspore/ops/function/debug_func.py +113 -1
mindspore/ops/function/fft_func.py +15 -2
mindspore/ops/function/grad/grad_func.py +3 -2
mindspore/ops/function/math_func.py +558 -207
mindspore/ops/function/nn_func.py +817 -383
mindspore/ops/function/other_func.py +3 -2
mindspore/ops/function/random_func.py +184 -8
mindspore/ops/function/reshard_func.py +13 -11
mindspore/ops/function/sparse_unary_func.py +1 -1
mindspore/ops/function/vmap_func.py +3 -2
mindspore/ops/functional.py +24 -14
mindspore/ops/op_info_register.py +3 -3
mindspore/ops/operations/__init__.py +6 -1
mindspore/ops/operations/_grad_ops.py +2 -76
mindspore/ops/operations/_infer_ops.py +1 -1
mindspore/ops/operations/_inner_ops.py +71 -94
mindspore/ops/operations/array_ops.py +12 -146
mindspore/ops/operations/comm_ops.py +42 -53
mindspore/ops/operations/custom_ops.py +83 -19
mindspore/ops/operations/debug_ops.py +42 -10
mindspore/ops/operations/manually_defined/_inner.py +12 -0
mindspore/ops/operations/manually_defined/ops_def.py +265 -10
mindspore/ops/operations/math_ops.py +12 -223
mindspore/ops/operations/nn_ops.py +20 -114
mindspore/ops/operations/other_ops.py +7 -4
mindspore/ops/operations/random_ops.py +46 -1
mindspore/ops/primitive.py +18 -6
mindspore/ops_generate/arg_dtype_cast.py +2 -0
mindspore/ops_generate/gen_aclnn_implement.py +11 -11
mindspore/ops_generate/gen_constants.py +36 -0
mindspore/ops_generate/gen_ops.py +67 -52
mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
mindspore/ops_generate/gen_pyboost_func.py +131 -47
mindspore/ops_generate/op_proto.py +10 -3
mindspore/ops_generate/pyboost_utils.py +14 -1
mindspore/ops_generate/template.py +43 -21
mindspore/parallel/__init__.py +3 -1
mindspore/parallel/_auto_parallel_context.py +28 -8
mindspore/parallel/_cell_wrapper.py +83 -0
mindspore/parallel/_parallel_serialization.py +47 -19
mindspore/parallel/_tensor.py +81 -11
mindspore/parallel/_utils.py +13 -1
mindspore/parallel/algo_parameter_config.py +5 -5
mindspore/parallel/checkpoint_transform.py +46 -39
mindspore/parallel/cluster/process_entity/__init__.py +1 -1
mindspore/parallel/cluster/process_entity/_api.py +31 -23
mindspore/parallel/cluster/process_entity/_utils.py +2 -27
mindspore/parallel/parameter_broadcast.py +3 -4
mindspore/parallel/shard.py +162 -31
mindspore/parallel/transform_safetensors.py +993 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/common/constant.py +29 -0
mindspore/profiler/common/registry.py +47 -0
mindspore/profiler/common/util.py +28 -0
mindspore/profiler/dynamic_profiler.py +694 -0
mindspore/profiler/envprofiling.py +17 -19
mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
mindspore/profiler/parser/base_timeline_generator.py +19 -25
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
mindspore/profiler/parser/framework_parser.py +1 -391
mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
mindspore/profiler/parser/memory_usage_parser.py +0 -154
mindspore/profiler/parser/profiler_info.py +78 -6
mindspore/profiler/profiler.py +153 -0
mindspore/profiler/profiling.py +280 -412
mindspore/rewrite/__init__.py +1 -2
mindspore/rewrite/common/namespace.py +4 -4
mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
mindspore/run_check/_check_version.py +36 -103
mindspore/safeguard/rewrite_obfuscation.py +591 -247
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +4 -3
mindspore/train/_utils.py +28 -2
mindspore/train/amp.py +171 -53
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +4 -4
mindspore/train/callback/_checkpoint.py +85 -22
mindspore/train/callback/_cluster_monitor.py +1 -1
mindspore/train/callback/_flops_collector.py +1 -0
mindspore/train/callback/_loss_monitor.py +3 -3
mindspore/train/callback/_on_request_exit.py +134 -31
mindspore/train/callback/_summary_collector.py +5 -5
mindspore/train/callback/_tft_register.py +352 -0
mindspore/train/dataset_helper.py +7 -3
mindspore/train/metrics/metric.py +3 -3
mindspore/train/metrics/roc.py +4 -4
mindspore/train/mind_ir_pb2.py +44 -39
mindspore/train/model.py +134 -58
mindspore/train/serialization.py +336 -112
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +21 -0
mindspore/utils/utils.py +60 -0
mindspore/version.py +1 -1
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +258 -252
mindspore/include/c_api/ms/abstract.h +0 -67
mindspore/include/c_api/ms/attribute.h +0 -197
mindspore/include/c_api/ms/base/handle_types.h +0 -43
mindspore/include/c_api/ms/base/macros.h +0 -32
mindspore/include/c_api/ms/base/status.h +0 -33
mindspore/include/c_api/ms/base/types.h +0 -283
mindspore/include/c_api/ms/context.h +0 -102
mindspore/include/c_api/ms/graph.h +0 -160
mindspore/include/c_api/ms/node.h +0 -606
mindspore/include/c_api/ms/tensor.h +0 -161
mindspore/include/c_api/ms/value.h +0 -84
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/extend/basic.py +0 -140
mindspore/nn/extend/embedding.py +0 -143
mindspore/nn/extend/layer/normalization.py +0 -109
mindspore/nn/extend/pooling.py +0 -117
mindspore/nn/layer/embedding_service.py +0 -531
mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
mindspore/ops/extend/__init__.py +0 -53
mindspore/ops/extend/array_func.py +0 -218
mindspore/ops/extend/math_func.py +0 -76
mindspore/ops/extend/nn_func.py +0 -308
mindspore/ops/silent_check.py +0 -162
mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
mindspore/profiler/parser/msadvisor_parser.py +0 -240
mindspore/train/callback/_mindio_ttp.py +0 -443
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0

mindspore/profiler/profiling.py CHANGED Viewed

@@ -14,19 +14,16 @@
 # ============================================================================
 """Profiling api file."""
 import os
-import re
-import shutil
 import stat
 import time
 import json
 from json import JSONDecodeError
 import glob
-import subprocess
-import csv
 import socket
+import multiprocessing
 from enum import Enum
-from multiprocessing import Process
 from typing import List
+from sys import getsizeof
 import numpy as np
 from mindspore import log as logger, context
@@ -47,13 +44,11 @@ from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
 from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
 from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
 from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
-from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
 from mindspore.profiler.parser.minddata_parser import MinddataParser
 from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
 from mindspore.profiler.parser.minddata_pipeline_parser import \
     MinddataPipelineParser
-from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
-from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
+from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
 from mindspore.profiler.parser.profiler_info import ProfilerInfo
 from mindspore.common.api import _pynative_executor
 from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
@@ -67,6 +62,11 @@ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
 from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
 from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
 from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
+from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
+from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
+from mindspore.profiler.parser.ascend_analysis.constant import Constant
+from mindspore.profiler.common.util import timeit
 INIT_OP_NAME = 'Default/InitDataSetQueue'
@@ -105,7 +105,7 @@ class DeviceSupportParam(Enum):
     ASCEND = [
         'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
         'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
-        'ascend_job_id', 'profile_framework', 'host_stack', 'profiler_level', 'data_simplification'
+        'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
     ]
@@ -114,7 +114,6 @@ ALWAYS_VALID_PARAM = [
     'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
 ]
 ANALYSIS_ASYNC_MODE = 'async'
 ANALYSIS_SYNC_MODE = 'sync'
 DEFAULT_MODEL_ID = 4294967295
@@ -164,147 +163,6 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
         logger.warning("Can not map the start time for item: %s.", row)
-def _calculate_dataset_execution_time(input_file, output_file):
-    r"""
-    Parse the host info into timeline file, so as to show on UI.
-    Args:
-        input_file: the original host_info file, in csv format.
-        output_file: the output file, in csv format.
-    """
-    input_file = validate_and_normalize_path(input_file)
-    # execution_time_map is used to store the ExecutionCalculator for each stage.
-    execution_time_map = {}
-    # ts_map is used to store the start time of each event_stage_tid_pid.
-    ts_map = {}
-    with open(input_file, 'r') as f:
-        for row in csv.DictReader(f):
-            try:
-                module_name = row['module_name']
-                if module_name != 'Dataset':
-                    continue
-                _calculate_dataset_item(row, execution_time_map, ts_map)
-            except KeyError as e:
-                logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
-                continue
-    if ts_map:
-        logger.warning("Only start time is record for these items:")
-        for k, v in ts_map.items():
-            logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
-    output_file = validate_and_normalize_path(output_file)
-    flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
-    modes = stat.S_IWUSR | stat.S_IRUSR
-    with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
-        csv_writer = csv.writer(f)
-        csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
-        for _, v in execution_time_map.items():
-            csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
-    os.chmod(output_file, modes)
-    logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
-def _extract_timeline_item(row, time_line, ts_map):
-    """Process one row, try to extract a timeline item."""
-    start_end = row['start_end']
-    event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
-    # map start and end, put the mapped event into timeline.
-    if start_end == '1' and event_stage_tid_pid in ts_map:
-        title = row['event'] + '::' + row['stage']
-        event = {'name': title, 'cat': row['module_name']}
-        ts_end = int(row['time_stamp(us)'])
-        ts = ts_map[event_stage_tid_pid]
-        event['ts'] = ts
-        event['dur'] = ts_end - ts
-        event['ph'] = 'X'
-        event['pid'] = row['pid']
-        event['tid'] = row['tid']
-        event['args'] = {'parent_pid': row['parent_pid']}
-        time_line.append(event)
-        del ts_map[event_stage_tid_pid]
-    elif start_end == '0':
-        ts = int(row['time_stamp(us)'])
-        ts_map[event_stage_tid_pid] = ts
-    # Put the instance event into timeline.
-    elif start_end == '2':
-        title = row['event'] + '::' + row['stage']
-        event = {
-            'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
-            'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
-        }
-        time_line.append(event)
-    else:
-        logger.warning("Can not map the start time for item: %s.", row)
-def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
-    r"""
-    Parse the host info into timeline file, so as to show on UI.
-    Args:
-        input_file: the original host_info file, in csv format.
-        output_timeline_file: the output timeline file, in json format.
-        output_memory_file: the output memory_usage file, in csv format.
-        is_develop_user: some data only shown to develop users, other users no need to analyse it.
-    """
-    input_file = validate_and_normalize_path(input_file)
-    time_line = []
-    # ts_map is used to store the start time of each event_stage_tid_pid
-    ts_map = {}
-    memory_header = [
-        'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
-        'memory_usage(kB)', 'time_stamp(us)'
-    ]
-    memory_info = []
-    with open(input_file, 'r') as f:
-        for row in csv.DictReader(f):
-            try:
-                level = row['level']
-                if level == '0' and not is_develop_user:
-                    continue
-                if int(row['time_stamp(us)']) > 0:
-                    _extract_timeline_item(row, time_line, ts_map)
-                if int(row['memory_usage(kB)']) > 0:
-                    memory_info.append(row)
-            except KeyError as e:
-                logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
-                continue
-    if memory_info:
-        with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
-            csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
-            csv_writer.writeheader()
-            for item in memory_info:
-                csv_writer.writerow(item)
-        os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
-    else:
-        logger.warning("No memory_usage is record in file: %s", input_file)
-    if ts_map:
-        logger.warning("Only start time is record for these items:")
-        for k, v in ts_map.items():
-            logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
-            last_dash = k.rfind('_')
-            if last_dash == -1:
-                logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
-                continue
-            second_last_dash = k.rfind('_', 0, last_dash - 1)
-            if second_last_dash == -1:
-                logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
-                continue
-            pid = k[last_dash + 1:]
-            tid = k[second_last_dash + 1: last_dash]
-            title = k[:second_last_dash]
-            unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
-            time_line.append(unfinished_timeline)
-    if time_line:
-        timeline_file = validate_and_normalize_path(output_timeline_file)
-        with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
-            json.dump(time_line, json_file)
-        os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
-    else:
-        logger.warning("No valid time_stamp is record in file: %s", input_file)
 def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
     """Executing the msprof export mode."""
     try:
@@ -351,20 +209,21 @@ class Profiler:
         output_path (str, optional): Output data path. Default: ``"./data"`` .
         profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
-            - Profiler.Level0: Leanest level of profiling data collection, collects information about the elapsed
+            - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
               time of the computational operators on the NPU and communication large operator information.
-            - Profiler.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and communication
-              mini operator information based on Level0.
-            - Profiler.Level2: Collect GE and Runtime information in CANN layer on top of Level1
+            - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
+              communication mini operator information based on Level0.
+            - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
         op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
         profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
             a multi devices training,collect when True. Setting this parameter has no effect during single card
             training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
         profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
-            When using this parameter, `op_time` must be set to True. Default: ``False`` .
+            When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
+            compilation level is O2 requires collecting from the first step. Default: ``False`` .
         parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
-            Default value: ``True`` .
+            Default value: ``False`` .
         start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
             data collection based on conditions. Default: ``True`` .
         aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
@@ -380,11 +239,12 @@ class Profiler:
             - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
             - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
             - 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
+              This function only support Atlas A2 training series products.
         l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
             Default: ``False`` .
-        hbm_ddr (bool, optional): (Ascend only) Whether to collect HBM/DDR read and write rate data, collect when True.
-            Default: ``False`` .
+        hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
+            collect when True. Default: ``False`` .
         pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
             Default: ``False`` .
         sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
@@ -396,25 +256,32 @@ class Profiler:
             - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
               This method can reduce the impact of adding profiler on overall training time.
         data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
-            Default value: ``True`` .
+            Default value: ``False`` .
         timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
             When using this parameter, `op_time` must be set to True. Default value: ``500`` .
         profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
-            ["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
-            specified profiler directory, which stores the collected memory and time files on the Host side.
-            Default: "all".
+            ["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
+            parameter, the op_time parameter must be enabled.
+            Default: None.
-            - "all": Record both host timestamp and host memory usage.
-            - "time": Only record host timestamp.
-            - "memory": Only record host memory usage.
+            - "all": Record host timestamp.
+            - "time": The same as "all".
             - None: Not record host information.
         data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
             If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
             directory are retained to save disk space.
             Default value: ``True`` .
-        host_stack (bool, optional): (Ascend) Whether to collect frame host call stack data.
-            Default value: ``True`` .
+        with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
+            data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
+            profile_framework parameters must be enabled. Default value: ``False`` .
+        analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
+            data. This parameter is experimental parameter and does not need to be set by the user.
+            Default value: ``False`` .
+        rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
+            experimental parameter and does not need to be set by the user. Default value: ``0`` .
+        env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
+            This parameter is experimental parameter and does not need to be set by the user.
+            Default value: ``False`` .
     Raises:
         RuntimeError: When the version of CANN does not match the version of MindSpore,
             MindSpore cannot parse the generated ascend_job_id directory structure.
@@ -428,6 +295,7 @@ class Profiler:
         >>> from mindspore import nn
         >>> import mindspore.dataset as ds
         >>> from mindspore import Profiler
+        >>> from mindspore.profiler import ProfilerLevel
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self):
@@ -453,7 +321,7 @@ class Profiler:
         ...
         ...     # Init Profiler
         ...     # Note that the Profiler should be initialized before model.train
-        ...     profiler = Profiler()
+        ...     profiler = Profiler(profiler_level=ProfilerLevel.Level0)
         ...
         ...     # Train Model
         ...     net = Net()
@@ -462,11 +330,6 @@ class Profiler:
         ...     # Profiler end
         ...     profiler.analyse()
     """
-    _hwts_output_filename_target = "output_format_data_hwts_"
-    _opcompute_output_filename_target = "output_op_compute_time_"
-    _aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
-    _has_analysed = False
     _has_initialized = False
     _ascend_profiling_options = ""
     _ascend_job_id = ""
@@ -492,6 +355,9 @@ class Profiler:
         self._rank_size = 1
         self._rank_id = 0
         self._ascend_profiler = None
+        self.metadata = {}
+        self.max_str_len = 4096
+        self.max_meta_size = 50 * 1024
         self._timeline_size_limit_byte = 500 * 1024 * 1024  # 500MB
         self._parallel_strategy = True
         self._model_iteration_dict = None
@@ -512,13 +378,13 @@ class Profiler:
         self._sync_enable = True
         self._stop_time = 0
         self._dynamic_status = False
-        self._profile_framework = "all"
+        self._profile_framework = None
         self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
         self.profiler_level = None
         self._pretty_json = False
         self._analyse_only = kwargs.get("analyse_only", False)
         self._data_simplification = kwargs.get("data_simplification", True)
-        self._host_stack = True
+        self._with_stack = False
         if self._msprof_enable:
             return
         self._start_time = int(time.time() * 1e6)  # us
@@ -540,20 +406,6 @@ class Profiler:
             if self.start_profile:
                 self.start()
-    @staticmethod
-    def _get_prof_rank(prof_path: str):
-        """get rank id."""
-        sub_dirs = os.listdir(os.path.realpath(prof_path))
-        info_json_path = ""
-        for sub_dir in sub_dirs:
-            if sub_dir.startswith("device_"):
-                device_id = sub_dir.split("_")[-1]
-                info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
-        if not os.path.exists(info_json_path):
-            return -1
-        rank_id, _ = Profiler._parse_info_json(info_json_path)
-        return rank_id
     @staticmethod
     def _check_output_path(output_path):
         """Checking path validity."""
@@ -602,30 +454,8 @@ class Profiler:
             logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
             return None
-    @staticmethod
-    def _parse_info_json(info_file):
-        """
-        Parse info log file, get the rank id and device id of the job.
-        Args:
-             input_file (str): The file path of the parse info log file.
-        Returns:
-            rank id, device id
-        """
-        with open(info_file, "r") as f:
-            info_dict = json.load(f)
-            rank_id = info_dict.get("rank_id", 0)
-            dev_info = info_dict.get("DeviceInfo", [])
-            dev_id = dev_info[0].get("id", -1)
-            if int(rank_id) < 0:
-                rank_id = 0
-            return str(rank_id), str(dev_id)
     @classmethod
-    def offline_analyse(cls, path: str, pretty=False, step_list=None):
+    def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
         """
         Analyze training performance data offline, which is invoked after performance data collection is completed.
@@ -633,37 +463,50 @@ class Profiler:
             path (str): The profiling data path which need to be analyzed offline.
                 There needs to be a profiler directory in this path.
             pretty (bool, optional): Whether to pretty json files. Default: ``False``.
-            step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
-                By default, all steps will be analyzed.
+            step_list (list, optional): A list of steps that need to be analyzed, the steps must be
+                consecutive integers. Default: ``None``. By default, all steps will be analyzed.
+            data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
         Examples:
             >>> from mindspore import Profiler
             >>> Profiler.offline_analyse("./profiling_path")
         """
-        profiler_path = os.path.join(path, "profiler")
-        if not os.path.exists(profiler_path):
-            raise ProfilerPathErrorException(f'There must be a profiler folder in the data path: {path}.')
-        rank_set = set()
-        sub_dirs = os.listdir(os.path.realpath(profiler_path))
-        for sub_dir in sub_dirs:
-            sub_path = os.path.join(profiler_path, sub_dir)
-            if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z0-9]+", sub_dir):
-                rank = cls._get_prof_rank(sub_path)
-                rank_set.add(rank)
-        if not rank_set:
-            return
-        process_list = []
-        for rank_id in rank_set:
-            profiler = cls(analyse_only=True, rank_id=rank_id)
-            process = Process(target=profiler.analyse,
-                              args=(path, pretty, step_list))
-            process.start()
-            process_list.append(process)
-        for process in process_list:
-            process.join()
+        real_path = os.path.realpath(path)
+        PathManager.check_input_directory_path(real_path)
+        profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
+        if not isinstance(data_simplification, bool):
+            logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
+                           f"but got type {type(data_simplification)}, it will be set to True.")
+            data_simplification = True
+        if not profiler_parent_path_list:
+            raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
+                                             f'single-device profiler data, or multiple subdirectories each containing '
+                                             f'a "profiler" directory for multi-device profiler data. ')
+        # get rank id
+        rank_list = []
+        for parent_path in profiler_parent_path_list:
+            profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
+            rank_id = ProfilerInfo.get_rank_id(profiler_path)
+            if int(rank_id) < 0:
+                logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
+            rank_list.append(rank_id)
+        # start offline analyse
+        if len(profiler_parent_path_list) == 1:
+            PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
+            profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
+            profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
+        else:
+            # Multiprocess Parsing
+            multiprocessing.set_start_method("fork", force=True)
+            process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
+            pool = multiprocessing.Pool(processes=process_number)
+            for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
+                PathManager.check_directory_path_writeable(profiler_parent_path)
+                profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
+                                       data_simplification=data_simplification)
+                pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
+            pool.close()
+            pool.join()
     def op_analyse(self, op_name, device_id=None):
         """
@@ -739,14 +582,38 @@ class Profiler:
                 Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
                 for online mode. Default: ``None``.
             pretty (bool, optional): Whether to pretty json files. Default: ``False``.
-            step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
-                By default, all steps will be analyzed.
+            step_list (list, optional): A list of steps that need to be analyzed, the steps must be
+                consecutive integers. Default: ``None``. By default, all steps will be analyzed.
             mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
                 - sync: analyse data in current process, it will block the current process.
-                - async: analyse data in subprocess, it will not the current process.Since the parsing process
+                - async: analyse data in subprocess, it will not block the current process. Since the parsing process
                   will take up extra CPU resources, please enable this mode according to the actual resource situation.
+        Examples:
+            >>> from mindspore.train import Callback
+            >>> from mindspore import Profiler
+            >>> class StopAtStep(Callback):
+            ...     def __init__(self, start_step=1, stop_step=5):
+            ...         super(StopAtStep, self).__init__()
+            ...         self.start_step = start_step
+            ...         self.stop_step = stop_step
+            ...         self.profiler = Profiler(start_profile=False)
+            ...
+            ...     def step_begin(self, run_context):
+            ...         cb_params = run_context.original_args()
+            ...         step_num = cb_params.cur_step_num
+            ...         if step_num == self.start_step:
+            ...             self.profiler.start()
+            ...
+            ...     def step_end(self, run_context):
+            ...         cb_params = run_context.original_args()
+            ...         step_num = cb_params.cur_step_num
+            ...         if step_num == self.stop_step:
+            ...             self.profiler.stop()
+            ...
+            ...     def end(self, run_context):
+            ...         self.profiler.analyse(step_list=[2,3,4], mode="sync")
         """
         try:
             if isinstance(pretty, bool):
@@ -793,11 +660,12 @@ class Profiler:
         ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
         if offline_path:
+            # Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
+            ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
             ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
             self._ascend_graph_analyse(offline_path=offline_path)
             ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
             ProfilerInfo.save(self._output_path)
-            _offline_parse(offline_path)
             return
         if self._msprof_enable:
             return
@@ -817,18 +685,16 @@ class Profiler:
         ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
         if self._device_target and self._device_target == DeviceTarget.CPU.value:
             self._cpu_analyse()
+            if self._profile_framework:
+                logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
+                               "data.")
         if self._device_target and self._device_target == DeviceTarget.GPU.value:
             self._gpu_analyse()
         elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
             self._ascend_analyse()
-        if self._profile_framework:
-            if self._device_target != DeviceTarget.CPU.value:
-                self._host_info_analyse()
-            else:
-                logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
-                               " directory in the output path.")
         logger.info("Profiling: all the data have been analyzed.")
         ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
         ProfilerInfo.save(self._output_path)
@@ -895,8 +761,13 @@ class Profiler:
             self._ascend_graph_start()
         ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
         ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
-        ProfilerInfo.set_system_time(int(c_expression.get_clock_time() * 1e3)) # cast us to ns
-        _framework_profiler_enable_mi()
+        ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
+        if context.get_context("mode") == context.GRAPH_MODE:
+            jit_config = context.get_jit_config()
+            jit_level = jit_config.get("jit_level", "")
+            ProfilerInfo.set_jit_level(jit_level)
+        if self._profile_framework:
+            _framework_profiler_enable_mi()
     def stop(self):
         """
@@ -957,8 +828,88 @@ class Profiler:
         self._init_profiler_info()
         ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
         ProfilerInfo.save(self._output_path)
+        self._dump_metadata()
         logger.info("Profiling: stop time: %d", self._stop_time)
+    def add_metadata(self, key: str, value: str):
+        """
+        Report custom metadata key-value pair data.
+        Args:
+            key (str): The key to the metadata.
+            value (str): The value to the metadata.
+        Examples:
+            >>> from mindspore import Profiler
+            >>> # Profiler init.
+            >>> profiler = Profiler()
+            >>> # Call Profiler add_metadata
+            >>> profiler.add_metadata("test_key", "test_value")
+            >>> # Profiler end
+            >>> profiler.analyse()
+        """
+        if not isinstance(key, str) or not isinstance(value, str):
+            logger.warning("The key and value of metadata must be string. Skip this metadata.")
+            return
+        if not self._check_str_valid(key) or not self._check_str_valid(value):
+            logger.warning("Invalid input key or value. Skip this metadata.")
+            return
+        add_size = getsizeof(key) + getsizeof(value)
+        if getsizeof(self.metadata) + add_size < self.max_meta_size:
+            if key in self.metadata:
+                logger.warning(f"{key} is already saved as metadata, override it.")
+            self.metadata[key] = value
+        else:
+            logger.warning("Too many metadata added. Skip this metadata")
+    def add_metadata_json(self, key: str, value: str):
+        """
+        Report custom metadata key-value pair data with the value as a JSON string data.
+        Args:
+            key (str): The key to the metadata.
+            value (str): The json str format value to the metadata.
+        Examples:
+            >>> import json
+            >>> from mindspore import Profiler
+            >>> # Profiler init.
+            >>> profiler = Profiler()
+            >>> # Call Profiler add_metadata_json
+            >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
+            >>> # Profiler end, metadata will be saved in profiler_metadata.json
+            >>> profiler.analyse()
+        """
+        if not isinstance(key, str) or not isinstance(value, str):
+            logger.warning("The key and value of metadata must be string. Skip this metadata.")
+            return
+        if not self._check_str_valid(key) or not self._check_str_valid(value):
+            logger.warning("Invalid input key or value. Skip this metadata.")
+            return
+        add_size = getsizeof(key) + getsizeof(value)
+        if getsizeof(self.metadata) + add_size < self.max_meta_size:
+            try:
+                if key in self.metadata:
+                    logger.warning(f"{key} is already saved as metadata, override it.")
+                self.metadata[key] = json.loads(value)
+            except ValueError:
+                logger.warning("The metadata value must be json format string. Skip this metadata")
+        else:
+            logger.warning("Too many metadata added. Skip this metadata")
+    def _dump_metadata(self):
+        """Dump metadata to file."""
+        if not self.metadata:
+            return
+        FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
+        self.metadata.clear()
+    def _check_str_valid(self, input_str: str):
+        """Check str length"""
+        if len(input_str) > self.max_str_len:
+            return False
+        return True
     def _set_ascend_job_id(self, ascend_job_id):
         """Set output_path for offline parsing performance data."""
         if not ascend_job_id:
@@ -983,7 +934,7 @@ class Profiler:
         self._profile_communication = options.get('profile_communication')
         self._op_time = options.get('op_time')
         self._device_target = context.get_context("device_target").lower()
-        self._profile_framework = options.get('profile_framework', 'all')
+        self._profile_framework = options.get('profile_framework', None)
         self._profiler_manager = c_expression.ProfilerManager.get_instance()
         self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
         if self._data_process:
@@ -1034,32 +985,32 @@ class Profiler:
     def _gpu_profiler_init(self, kwargs):
         """Gpu profiler init."""
+        self._parse_parameter_for_gpu(kwargs)
         # Setup and start MindData Profiling
         if self._data_process:
             self._md_profiler = cde.GlobalContext.profiling_manager()
             self._md_profiler.init()
-        self._parse_parameter_for_gpu(kwargs)
         gpu_profiler = c_expression.Profiler
         self._gpu_profiler = gpu_profiler.get_instance("GPU")
-        self._gpu_profiler.init(self._output_path)
-        self._gpu_profiler.sync_enable(self._sync_enable)
         if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
             self._dev_id = str(get_rank())
         os.environ['DEVICE_ID'] = self._dev_id
         self._rank_id = self._dev_id
+        self._gpu_profiler.init(self._output_path, int(self._rank_id))
+        self._gpu_profiler.sync_enable(self._sync_enable)
     def _ascend_profiler_init(self, kwargs):
         """Ascend profiler init."""
+        self._parse_parameter_for_ascend(kwargs)
         # Setup and start MindData Profiling
         if self._data_process:
             self._md_profiler = cde.GlobalContext.profiling_manager()
             self._md_profiler.init()
         self._init_time = int(time.time() * 10000000)
         logger.info("Profiling: profiling init time: %d", self._init_time)
-        self._parse_parameter_for_ascend(kwargs)
-        os.environ['DEVICE_ID'] = self._dev_id
+        os.environ['DEVICE_ID'] = self._dev_id
         self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
         # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
         if len(self._ascend_profiling_options) > 2048:
@@ -1075,7 +1026,7 @@ class Profiler:
         data_path = os.path.join(container_path, "data")
         data_path = validate_and_normalize_path(data_path)
         if not os.path.exists(data_path):
-            os.makedirs(data_path, exist_ok=True)
+            os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
     def _construct_profiling_options(self):
         """
@@ -1101,9 +1052,9 @@ class Profiler:
             "op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
             "profile_framework": self._profile_framework,
             "profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
-            "host_stack": "on" if self._host_stack else "off"
+            "with_stack": "on" if self._with_stack else "off"
         }
+        ProfilerInfo.set_profiling_options(profiling_options)
         return profiling_options
     def _parse_parameter_for_gpu(self, kwargs):
@@ -1175,11 +1126,11 @@ class Profiler:
             pcie_enable = False
         self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
-        self._parallel_strategy = kwargs.pop("parallel_strategy", True)
+        self._parallel_strategy = kwargs.pop("parallel_strategy", False)
         if not isinstance(self._parallel_strategy, bool):
             logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
-                           f"but got type {type(self._parallel_strategy)}, it will be set to True.")
-            self._parallel_strategy = True
+                           f"but got type {type(self._parallel_strategy)}, it will be set to False.")
+            self._parallel_strategy = False
         self.profiler_level = kwargs.pop("profiler_level", None)
         if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
@@ -1381,7 +1332,7 @@ class Profiler:
         finally:
             pass
-    def _ascend_graph_memory_analyse(self, points):
+    def _ascend_graph_memory_analyse(self):
         """Analyse memory usage info."""
         if not self._profile_memory:
             return
@@ -1390,7 +1341,7 @@ class Profiler:
                            "PyNative mode currently.")
         try:
             logger.info("Profiling: analyzing the memory usage info.")
-            self._analyse_memory_usage(points)
+            self._analyse_memory_usage()
         except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
             logger.warning(err.message)
         finally:
@@ -1408,28 +1359,37 @@ class Profiler:
         dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
         ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
-        os.makedirs(ascend_profiler_output_path, exist_ok=True)
+        PathManager.make_dir_safety(ascend_profiler_output_path)
         source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
         target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
-        shutil.copy(source_profiler_info_path, target_profiler_info_path)
+        PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
+        source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
+        target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
+        PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
         source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
         target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
-        shutil.copy(source_timeline_path, target_timeline_path)
+        PathManager.copy_file(source_timeline_path, target_timeline_path)
         src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
-        if os.path.exists(src_op_mem_file):
-            dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
-            shutil.copy(src_op_mem_file, dst_op_mem_file)
+        dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
+        PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
-        ms_output_path = os.path.abspath(
+        ms_output_path = os.path.realpath(
             os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
         static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
         src_static_op_mem_path = glob.glob(static_op_mem_path)
         if src_static_op_mem_path:
             dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
-            shutil.copy(src_static_op_mem_path[0], dst_static_op_mem_file)
+            PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
+        src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
+        src_op_statistics_path = glob.glob(src_op_statistics_path)
+        if src_op_statistics_path:
+            dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
+            PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
         self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
         self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
@@ -1468,7 +1428,7 @@ class Profiler:
                                                           f"communication_matrix.json")
             communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
-            analyze_path = os.path.abspath(os.path.join(source_path, os.path.pardir, 'analyze'))
+            analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
             communicate_analyser = AscendCommunicationGenerator(analyze_path)
             communicate_analyser.parse()
             communicate_analyser.write(communication_file_path, communication_matrix_file_path)
@@ -1500,26 +1460,6 @@ class Profiler:
         finally:
             pass
-    def _ascend_graph_msadvisor_analyse(self, job_id):
-        """Call MSAdvisor function."""
-        logger.info("MSAdvisor starts running.")
-        msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
-        try:
-            msadvisor.analyse()
-        except FileNotFoundError as err:
-            logger.warning("MSAdvisor: command not found,"
-                           "please check if installed ascend-toolkit and set environment path correctly. %s", err)
-        except OSError as err:
-            logger.warning("Cannot execute binary file: Exec format error. %s", err)
-        except subprocess.CalledProcessError:
-            logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
-        except (ValueError, ProfilerFileNotFoundException) as err:
-            logger.warning("MSAdvisor running failed. %s", err)
-        finally:
-            pass
-        if context.get_context("mode") == context.PYNATIVE_MODE:
-            logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
     def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
         """Get the mapping between framework operator and device kernel."""
         if not kernels:
@@ -1535,8 +1475,6 @@ class Profiler:
             key = name if name.startswith("hcom_") else (name, ts)
             launch_op = kernel_map.get(key)
             if not launch_op:
-                if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
-                    logger.warning(f"Failed to get launch operator for {name}!")
                 continue
             launch_ops[index] = launch_op.name
         return launch_ops
@@ -1547,6 +1485,7 @@ class Profiler:
         else:
             MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
+    @timeit("Profiler analyse done")
     def _ascend_graph_analyse_inner(self, offline_path=None):
         """Ascend graph mode analyse."""
         job_id = self._get_profiling_job_id(offline_path)
@@ -1558,7 +1497,7 @@ class Profiler:
         source_path = os.path.join(self._output_path, job_id)
         self._minddata_analyse()
         if self._op_time:
-            mindstudio_profiler_output = os.path.abspath(
+            mindstudio_profiler_output = os.path.realpath(
                 os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
             flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
             if not flag:
@@ -1567,14 +1506,17 @@ class Profiler:
             ProfilerInfo.set_export_flag(flag)
             op_summary, op_statistic, steptrace, steptrace_model \
                 = _ascend_graph_msprof_analyse(mindstudio_profiler_output)
+            kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
             if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
                     not isinstance(op_statistic, np.ndarray) and not op_statistic:
+                logger.warning('Op statistic data is empty!')
                 return
-            kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
             launch_ops = self._get_kernel_op_map(op_summary, kernels)
             self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
             graph_ids = np.unique(op_summary['Model ID']).tolist()
-            points = self._ascend_fpbp_analyse(op_summary, steptrace)
+            self._ascend_fpbp_analyse(op_summary, steptrace)
             if len(graph_ids) == 1:
                 self._ascend_step_trace_analyse(steptrace)
             else:
@@ -1582,13 +1524,13 @@ class Profiler:
             if self._dynamic_status:
                 self._ascend_dynamic_net_analyse(op_summary)
             self._ascend_flops_analyse(op_summary, launch_ops)
-            self._ascend_graph_memory_analyse(points)
+            self._ascend_graph_memory_analyse()
             self._ascend_ms_analyze(mindstudio_profiler_output)
             self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
-            self._ascend_graph_msadvisor_analyse(job_id)
             self._minddata_aicpu_analyse(self._output_path, job_id)
             ProfilerInfo.set_graph_ids(graph_ids)
         try:
+            ProfilerInfo.set_data_simplification(self._data_simplification)
             ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
         except RuntimeError as err:
             logger.error('Profilier simplify data failed, %s', str(err))
@@ -1690,7 +1632,7 @@ class Profiler:
         try:
             timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
             timeline_generator.init_timeline(pretty=self._pretty_json)
-            timeline_generator.write_timeline(self._timeline_size_limit_byte)
+            timeline_generator.write_timeline()
             timeline_generator.write_timeline_summary()
         except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
             logger.warning('Fail to write timeline data: %s', err)
@@ -1699,15 +1641,13 @@ class Profiler:
             raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
                                "data.")
-    def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
-                            is_gpu_kernel_async_launch_flag=False):
+    def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
         """
         Analyse step trace data and save the result.
         Args:
-            source_path (str): The directory that contains the step trace original data.
-            framework_parser (FrameworkParser): The framework parse instance.
             is_training_mode_flag (bool): Whether in training mode or not.
+            is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
         """
         logger.info("Begin to parse step trace.")
         # construct output path
@@ -1738,56 +1678,31 @@ class Profiler:
                 logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
                 logger.info("The point info is: %s", point_info)
-                return point_info, is_training_mode_flag
-            return {}, is_training_mode_flag
-        # whether keep the first step
-        skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
-        # recognize inference or training mode
-        is_training_mode_flag = framework_parser.check_op_name("Gradients")
-        # parser the step trace files and save the result to disk
-        source_path = validate_and_normalize_path(source_path)
-        parser = AscendStepTraceParser(input_dir=source_path,
-                                       output_file_path=step_trace_intermediate_file_path,
-                                       skip_first_step=skip_first_step_flag,
-                                       is_training_mode=is_training_mode_flag)
-        parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
-        parser.parse_and_save()
-        point_info = parser.record_point_info(point_info_file_path)
-        # print parser result
-        parser.show()
-        logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
-        logger.info("The point info is: %s", point_info)
-        return point_info, is_training_mode_flag
     def _generate_timeline(self, reduce_op_type):
         """Used for gpu, generate timeline info, write to json format file."""
         try:
             timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
                                                       context.get_context("mode"))
             timeline_generator.init_timeline(reduce_op_type)
-            self._timeline_meta = timeline_generator.write_timeline(self._timeline_size_limit_byte)
+            self._timeline_meta = timeline_generator.write_timeline()
             timeline_generator.write_timeline_summary()
+            timeline_generator.parse_fwk_data()
+            timeline_generator.write_fwk_timeline()
             return timeline_generator
         except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
             logger.warning('Fail to write timeline data: %s', err)
             raise RuntimeError('Fail to write timeline data.') from err
-    def _analyse_memory_usage(self, points):
+    def _analyse_memory_usage(self):
         """Analyse memory usage data."""
         integrator = Integrator(self._output_path, self._rank_id)
-        aicore_detail_data = integrator.get_aicore_detail_data()
-        memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
-        memory_parser.init_memory_usage_info(aicore_detail_data, points)
-        memory_parser.write_memory_files()
+        integrator.get_aicore_detail_data()
     def _get_profiling_job_id(self, offline_path):
         """Get profiling job id, which was generated by ada service.
         Returns:
-            str, profiling job id.
+            str, profiling job id, eg: PROF_XXX/device_*.
         """
         if offline_path:
@@ -1816,18 +1731,17 @@ class Profiler:
                                "profiler will ignore this job dir.", job_dir)
                 continue
-            prof_rank_id, prof_device_id = self._parse_info_json(info_file_path)
+            prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
+            prof_device_id = ProfilerInfo.get_device_id(prof_dir)
             job_start_time = self._parse_job_start_time(prof_dir)
             if offline_path:
-                if self._rank_id != prof_rank_id:
-                    continue
                 self._start_time = int(job_start_time)
             else:
                 if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
-                    logger.debug("Find profiling find job path %s, but not current training device id. "
-                                 "Current training rank id %s, but job path rank id: %s, "
-                                 "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
+                    logger.warning("Find profiling find job path %s, but not current training device id. "
+                                   "Current training rank id %s, but job path rank id: %s, "
+                                   "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
                     continue
                 if job_start_time < self._start_time:
@@ -1936,19 +1850,21 @@ class Profiler:
                 self._output_path = validate_and_normalize_path(output_path)
         else:
             output_path = kwargs.pop("output_path")
+            if not isinstance(output_path, str):
+                logger.warning(
+                    f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
+                output_path = "data"
             self._output_path = validate_and_normalize_path(output_path)
         self._output_path = os.path.join(self._output_path, "profiler")
         if not os.path.exists(self._output_path):
-            os.makedirs(self._output_path, exist_ok=True)
-            os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
+            os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
         else:
             logger.warning("The target dir already exists. "
                            "There may be some old profiling data, and they will be rewritten in the end.")
         self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
         if not os.path.exists(self._framework_path):
-            os.makedirs(self._framework_path, exist_ok=True)
-            os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
+            os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
     def _parser_kwargs(self, kwargs):
         """Parse kwargs vale."""
@@ -1969,11 +1885,11 @@ class Profiler:
                            f"but got type {type(self._op_time)}, it will be set to True.")
             self._op_time = True
-        self._data_process = kwargs.pop("data_process", True)
+        self._data_process = kwargs.pop("data_process", False)
         if not isinstance(self._data_process, bool):
             logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
-                           f"but got type {type(self._data_process)}, it will be set to True.")
-            self._data_process = True
+                           f"but got type {type(self._data_process)}, it will be set to False.")
+            self._data_process = False
         timeline_limit = kwargs.pop("timeline_limit", 500)
         if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
@@ -1985,70 +1901,22 @@ class Profiler:
                 "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
             timeline_limit = 500
         self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
-        self._profile_framework = kwargs.pop("profile_framework", "all")
-        if self._profile_framework not in ["memory", "time", "all", None]:
-            logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
-                           f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
-            self._profile_framework = "all"
-        if not isinstance(self._data_simplification, bool):
-            logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
-                           f"but got type {type(self._data_simplification)}, it will be set to True.")
-            self._data_simplification = True
+        self._profile_framework = kwargs.pop("profile_framework", None)
+        if self._profile_framework not in ["time", "all", None]:
+            logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
+                           f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
+            self._profile_framework = None
         if not isinstance(self._data_simplification, bool):
             logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
                            f"but got type {type(self._data_simplification)}, it will be set to True.")
             self._data_simplification = True
-        self._host_stack = kwargs.pop("host_stack", True)
-        if not isinstance(self._host_stack, bool):
-            logger.warning(f"For '{self.__class__.__name__}', the parameter host_stack must be bool, but got "
-                           f"type {type(self._host_stack)}, it will be set to True.")
-            self._host_stack = True
-    def _host_info_analyse(self):
-        """
-        Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
-        """
-        logger.info("Profiling HostInfo start.")
-        host_dir = os.path.join(self._output_path, 'host_info')
-        host_dir = validate_and_normalize_path(host_dir)
-        if not os.path.exists(host_dir):
-            logger.warning("Host info directory: %s not exist.", host_dir)
-            return
-        csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
-        json_file_name = 'timeline_' + str(self._rank_id) + '.json'
-        memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
-        dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
-        host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
-        timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
-        memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
-        dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
-        _parse_host_info(host_info_file, timeline_file, memory_file)
-        _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
-        logger.info("Profile HostInfo finished.")
-def _offline_parse(offline_path):
-    """Parse data in abnormal scenario, only support for host_info at present."""
-    logger.info("Profiling HostInfo offline start.")
-    host_dir = os.path.join(offline_path, 'profiler', 'host_info')
-    host_dir = validate_and_normalize_path(host_dir)
-    if not os.path.exists(host_dir):
-        logger.warning("Host info directory: %s not exist.", host_dir)
-        return
-    files = os.listdir(host_dir)
-    for file in files:
-        if not file.startswith("host_info_") or not file.endswith(".csv"):
-            continue
-        rank_id = file.split('_')[-1].split('.')[0]
-        if not rank_id.isdigit():
-            logger.info("Cannot get rank_id from file: %s, skip it", file)
-            return
-        host_info_file = os.path.join(host_dir, file)
-        timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
-        memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
-        dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
-        _parse_host_info(host_info_file, timeline_file, memory_file)
-        _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
-    logger.info("Profile HostInfo offline finished.")
+        self._with_stack = kwargs.pop("with_stack", False)
+        if not isinstance(self._with_stack, bool):
+            logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
+                           f"type {type(self._with_stack)}, it will be set to False.")
+            self._with_stack = False
+        if self._with_stack and self._profile_framework not in ["time", "all"]:
+            logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
+            self._with_stack = False