mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -0
- mindspore/_extends/parse/parser.py +22 -28
- mindspore/_extends/parse/standard_method.py +1 -15
- mindspore/_extends/pijit/pijit_func_white_list.py +5 -2
- mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
- mindspore/amp.py +18 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +12 -18
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +38 -102
- mindspore/common/_utils.py +1 -9
- mindspore/common/api.py +106 -155
- mindspore/common/{dynamic_shape/auto_dynamic_shape.py → auto_dynamic_shape.py} +23 -17
- mindspore/common/dtype.py +57 -98
- mindspore/common/dump.py +1 -1
- mindspore/common/file_system.py +9 -59
- mindspore/common/hook_handle.py +3 -22
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +20 -4
- mindspore/common/recompute.py +4 -2
- mindspore/common/tensor.py +52 -38
- mindspore/communication/_hccl_management.py +297 -0
- mindspore/context.py +21 -15
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +1 -35
- mindspore/dataset/engine/datasets.py +315 -330
- mindspore/dataset/engine/datasets_user_defined.py +22 -38
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +5 -17
- mindspore/dataset/vision/utils.py +21 -632
- mindspore/device_context/ascend/op_tuning.py +1 -35
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -3
- mindspore/include/api/cell.h +4 -28
- mindspore/include/api/cfg.h +7 -24
- mindspore/include/api/context.h +0 -1
- mindspore/include/api/delegate.h +2 -0
- mindspore/include/api/dual_abi_helper.h +19 -100
- mindspore/include/api/graph.h +1 -14
- mindspore/include/api/kernel.h +3 -16
- mindspore/include/api/kernel_api.h +1 -9
- mindspore/include/api/metrics/accuracy.h +0 -9
- mindspore/include/api/model.h +1 -5
- mindspore/include/api/model_group.h +0 -4
- mindspore/include/api/model_parallel_runner.h +0 -2
- mindspore/include/api/status.h +10 -48
- mindspore/include/api/types.h +1 -6
- mindspore/include/dataset/constants.h +0 -9
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +2 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -5
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/distributed/__init__.py +0 -4
- mindspore/mint/distributed/distributed.py +14 -217
- mindspore/mint/nn/layer/_functions.py +2 -1
- mindspore/mint/nn/layer/conv.py +6 -6
- mindspore/mint/nn/layer/normalization.py +3 -3
- mindspore/nn/cell.py +174 -216
- mindspore/nn/layer/activation.py +2 -4
- mindspore/nn/layer/basic.py +13 -7
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/optim/adam.py +3 -1
- mindspore/nn/optim/lamb.py +3 -1
- mindspore/nn/optim/tft_wrapper.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +5 -39
- mindspore/nn/wrap/grad_reducer.py +15 -0
- mindspore/numpy/array_creations.py +2 -2
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_op_impl/cpu/__init__.py +0 -1
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +2 -12
- mindspore/ops/auto_generate/gen_extend_func.py +4 -4
- mindspore/ops/auto_generate/gen_ops_def.py +16 -290
- mindspore/ops/auto_generate/gen_ops_prim.py +76 -563
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/function/__init__.py +0 -1
- mindspore/ops/function/array_func.py +6 -10
- mindspore/ops/function/debug_func.py +2 -4
- mindspore/ops/function/grad/grad_func.py +12 -4
- mindspore/ops/function/math_func.py +32 -44
- mindspore/ops/function/nn_func.py +20 -18
- mindspore/ops/functional.py +1 -2
- mindspore/ops/functional_overload.py +12 -23
- mindspore/ops/operations/_inner_ops.py +12 -11
- mindspore/ops/operations/array_ops.py +50 -4
- mindspore/ops/operations/comm_ops.py +15 -1
- mindspore/ops/operations/custom_ops.py +4 -10
- mindspore/ops/operations/debug_ops.py +6 -6
- mindspore/ops/operations/manually_defined/ops_def.py +12 -12
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +1 -1
- mindspore/ops/primitive.py +10 -3
- mindspore/ops/tensor_method.py +7 -16
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +16 -0
- mindspore/parallel/_auto_parallel_context.py +15 -5
- mindspore/parallel/_parallel_serialization.py +2 -3
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_transformer/transformer.py +4 -4
- mindspore/parallel/_utils.py +11 -5
- mindspore/parallel/auto_parallel.py +9 -23
- mindspore/parallel/checkpoint_transform.py +0 -2
- mindspore/parallel/cluster/process_entity/_api.py +1 -4
- mindspore/parallel/cluster/run.py +3 -5
- mindspore/parallel/function/reshard_func.py +5 -6
- mindspore/parallel/nn/parallel_cell_wrapper.py +3 -40
- mindspore/parallel/nn/parallel_grad_reducer.py +8 -0
- mindspore/parallel/shard.py +21 -7
- mindspore/parallel/transform_safetensors.py +4 -10
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +9 -10
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +0 -9
- mindspore/profiler/common/profiler_context.py +2 -25
- mindspore/profiler/common/profiler_meta_data.py +0 -1
- mindspore/profiler/common/profiler_op_analyse.py +6 -10
- mindspore/{ops/_op_impl/cpu/joinedstr_op.py → profiler/common/validator/__init__.py} +1 -15
- mindspore/profiler/common/validator/validate_path.py +84 -0
- mindspore/profiler/dynamic_profiler.py +46 -91
- mindspore/profiler/envprofiler.py +5 -30
- mindspore/profiler/experimental_config.py +1 -16
- mindspore/profiler/platform/cpu_profiler.py +4 -10
- mindspore/profiler/platform/npu_profiler.py +1 -1
- mindspore/profiler/profiler.py +145 -193
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/runtime/__init__.py +4 -6
- mindspore/runtime/executor.py +0 -27
- mindspore/runtime/memory.py +0 -1
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +3 -3
- mindspore/train/amp.py +3 -0
- mindspore/train/callback/_callback.py +1 -2
- mindspore/train/callback/_checkpoint.py +8 -1
- mindspore/train/callback/_flops_collector.py +6 -10
- mindspore/train/callback/_train_fault_tolerance.py +7 -3
- mindspore/train/data_sink.py +4 -4
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +20 -4
- mindspore/train/serialization.py +15 -35
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/utils.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +193 -192
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +0 -1109
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/dynamic_shape/enable_dynamic.py +0 -197
- /mindspore/common/{dynamic_shape/_auto_dynamic.py → _auto_dynamic.py} +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiler.py
CHANGED
|
@@ -16,18 +16,18 @@
|
|
|
16
16
|
import os
|
|
17
17
|
import json
|
|
18
18
|
import warnings
|
|
19
|
-
from typing import Optional, Dict, Callable, Any
|
|
19
|
+
from typing import Optional, Dict, Callable, Any
|
|
20
20
|
from sys import getsizeof
|
|
21
21
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
22
22
|
|
|
23
23
|
from mindspore import log as logger
|
|
24
|
-
from mindspore.profiler.common.constant import ProfilerStepNameConstant, DeviceTarget
|
|
24
|
+
from mindspore.profiler.common.constant import ProfilerStepNameConstant, DeviceTarget
|
|
25
25
|
from mindspore.profiler.common.profiler_context import ProfilerContext
|
|
26
26
|
from mindspore.profiler.platform.npu_profiler import NPUProfilerAnalysis
|
|
27
27
|
from mindspore.profiler.profiler_action_controller import ProfilerActionController
|
|
28
28
|
from mindspore.profiler.experimental_config import _ExperimentalConfig
|
|
29
29
|
from mindspore.profiler.profiler_interface import ProfilerInterface
|
|
30
|
-
from mindspore.profiler.schedule import _default_schedule_fn, ProfilerAction
|
|
30
|
+
from mindspore.profiler.schedule import _default_schedule_fn, ProfilerAction, Schedule
|
|
31
31
|
from mindspore.profiler.common.record_function import RecordFunction
|
|
32
32
|
from mindspore.profiler.common.path_manager import PathManager
|
|
33
33
|
from mindspore.profiler.common.profiler_path_manager import ProfilerPathManager
|
|
@@ -111,9 +111,9 @@ def tensorboard_trace_handler(dir_name: str = None, worker_name: str = None,
|
|
|
111
111
|
logger.warning("async_mode is not bool, set by default.")
|
|
112
112
|
async_mode = False
|
|
113
113
|
|
|
114
|
-
def handler_fn(
|
|
114
|
+
def handler_fn() -> None:
|
|
115
115
|
if analyse_flag:
|
|
116
|
-
|
|
116
|
+
NPUProfilerAnalysis.online_analyse(async_mode=async_mode)
|
|
117
117
|
|
|
118
118
|
return handler_fn
|
|
119
119
|
|
|
@@ -261,10 +261,9 @@ class Profiler:
|
|
|
261
261
|
... # Profiler end
|
|
262
262
|
... profiler.analyse()
|
|
263
263
|
"""
|
|
264
|
+
MAX_META_SIZE = 100 * 1024 * 1024 # 100MB
|
|
264
265
|
|
|
265
266
|
def __init__(self, **kwargs) -> None:
|
|
266
|
-
logger.warning("'mindspore.Profiler' will be deprecated and removed in a future version. Please use the api "
|
|
267
|
-
"'mindspore.profiler.profile' instead.")
|
|
268
267
|
self._metadata: Dict[str, str] = {}
|
|
269
268
|
self._prof_context: ProfilerContext = ProfilerContext()
|
|
270
269
|
self._prof_context.set_params(**kwargs)
|
|
@@ -565,7 +564,7 @@ class Profiler:
|
|
|
565
564
|
return
|
|
566
565
|
|
|
567
566
|
add_size = getsizeof(key) + getsizeof(value)
|
|
568
|
-
if getsizeof(self._metadata) + add_size <
|
|
567
|
+
if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
|
|
569
568
|
if key in self._metadata:
|
|
570
569
|
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
571
570
|
self._metadata[key] = value
|
|
@@ -596,7 +595,7 @@ class Profiler:
|
|
|
596
595
|
return
|
|
597
596
|
|
|
598
597
|
add_size = getsizeof(key) + getsizeof(value)
|
|
599
|
-
if getsizeof(self._metadata) + add_size <
|
|
598
|
+
if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
|
|
600
599
|
try:
|
|
601
600
|
if key in self._metadata:
|
|
602
601
|
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
@@ -697,32 +696,38 @@ class Profile:
|
|
|
697
696
|
correspondence, cluster, etc data analysis.
|
|
698
697
|
|
|
699
698
|
Args:
|
|
700
|
-
|
|
699
|
+
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
700
|
+
data collection based on conditions. Default: ``True`` .
|
|
701
|
+
activities (list, optional): The activities to collect.
|
|
701
702
|
Default: ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
|
|
702
703
|
|
|
703
704
|
- ProfilerActivity.CPU: Collect MindSpore framework data.
|
|
704
705
|
- ProfilerActivity.NPU: Collect CANN software stack and NPU data.
|
|
705
706
|
- ProfilerActivity.GPU: Collect GPU data.
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
707
|
+
schedule (schedule, optional): Sets the action strategy for the capture, defined by the schedule class,
|
|
708
|
+
to be used with the step interface. Default: ``None``. Performance data of all steps is collected.
|
|
709
|
+
For details, see :class:`mindspore.profiler.schedule` .
|
|
710
|
+
on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
|
|
711
|
+
is collected. Default: ``None``. It indicates that only performance data is collected, but not resolved.
|
|
712
|
+
For details, see :func:`mindspore.profiler.tensorboard_trace_handler` .
|
|
710
713
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
711
714
|
When using this parameter, `activities` must set to ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
|
|
712
715
|
Collecting operator memory data when GE backend requires collecting from the first step.
|
|
713
716
|
Default: ``False`` . The operator name currently collected by this parameter is incomplete.
|
|
714
717
|
This issue will be resolved in later versions. It is recommended to use the environment variable
|
|
715
718
|
``MS_ALLOC_CONF`` instead.
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
Default value: ``False`` .
|
|
720
|
-
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
721
|
-
data collection based on conditions. Default: ``True`` .
|
|
719
|
+
with_stack (bool, optional): (Ascend only) Whether to collect frame host call stack data
|
|
720
|
+
on the Python side. This
|
|
721
|
+
data is presented in the form of a flame graph in the timeline. When using this parameter, `activities` must
|
|
722
|
+
include ``ProfilerActivity.CPU``. Default value: ``False`` .
|
|
722
723
|
hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
|
|
723
724
|
collect when True. Default: ``False`` .
|
|
724
725
|
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
725
726
|
Default: ``False`` .
|
|
727
|
+
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
728
|
+
Default value: ``False`` .
|
|
729
|
+
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
730
|
+
Default value: ``False`` .
|
|
726
731
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
727
732
|
Default: ``True`` .
|
|
728
733
|
|
|
@@ -734,12 +739,6 @@ class Profile:
|
|
|
734
739
|
record_shapes (bool, optional): (Ascend only) Whether to collect operator input tensor shapes data, collect
|
|
735
740
|
when ``True`` . When using this parameter, `activities` must include ``ProfilerActivity.CPU``.
|
|
736
741
|
Default: ``False``.
|
|
737
|
-
schedule (Callable, optional): Sets the action strategy for the capture, defined by the schedule class,
|
|
738
|
-
to be used with the step interface. Default: ``None``. Performance data of all steps is collected.
|
|
739
|
-
For details, see :class:`mindspore.profiler.schedule` .
|
|
740
|
-
on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
|
|
741
|
-
is collected. Default: ``None``. It indicates that only performance data is collected, but not resolved.
|
|
742
|
-
For details, see :func:`mindspore.profiler.tensorboard_trace_handler` .
|
|
743
742
|
experimental_config (_ExperimentalConfig, optional): expandable parameters can be configured in this
|
|
744
743
|
configuration item. For details, see :class:`mindspore.profiler._ExperimentalConfig` .
|
|
745
744
|
Raises:
|
|
@@ -804,54 +803,70 @@ class Profile:
|
|
|
804
803
|
... prof.step()
|
|
805
804
|
"""
|
|
806
805
|
|
|
807
|
-
def __init__(
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
self.
|
|
842
|
-
|
|
843
|
-
self.
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
self.
|
|
848
|
-
self.
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
self.
|
|
806
|
+
def __init__(
|
|
807
|
+
self,
|
|
808
|
+
activities: list = None,
|
|
809
|
+
with_stack: bool = False,
|
|
810
|
+
profile_memory: bool = False,
|
|
811
|
+
data_process: bool = False,
|
|
812
|
+
parallel_strategy: bool = False,
|
|
813
|
+
start_profile: bool = True,
|
|
814
|
+
hbm_ddr: bool = False,
|
|
815
|
+
pcie: bool = False,
|
|
816
|
+
sync_enable: bool = True,
|
|
817
|
+
record_shapes: bool = False,
|
|
818
|
+
schedule: Schedule = None,
|
|
819
|
+
on_trace_ready: Optional[Callable[..., Any]] = None,
|
|
820
|
+
experimental_config: Optional[_ExperimentalConfig] = None,
|
|
821
|
+
):
|
|
822
|
+
self._activities = activities
|
|
823
|
+
self._with_stack = with_stack
|
|
824
|
+
self._profile_memory = profile_memory
|
|
825
|
+
self._data_process = data_process
|
|
826
|
+
self._parallel_strategy = parallel_strategy
|
|
827
|
+
self._start_profile = start_profile
|
|
828
|
+
self._hbm_ddr = hbm_ddr
|
|
829
|
+
self._pcie = pcie
|
|
830
|
+
self._sync_enable = sync_enable
|
|
831
|
+
self._record_shapes = record_shapes
|
|
832
|
+
self._schedule = schedule
|
|
833
|
+
self._on_trace_ready = on_trace_ready
|
|
834
|
+
self._experimental_config = experimental_config or _ExperimentalConfig()
|
|
835
|
+
self._profiler = Profiler(
|
|
836
|
+
profiler_level=self._experimental_config.profiler_level,
|
|
837
|
+
activities=self._activities,
|
|
838
|
+
aic_metrics=self._experimental_config.aic_metrics,
|
|
839
|
+
with_stack=self._with_stack,
|
|
840
|
+
profile_memory=self._profile_memory,
|
|
841
|
+
data_process=self._data_process,
|
|
842
|
+
parallel_strategy=self._parallel_strategy,
|
|
843
|
+
start_profile=self._start_profile,
|
|
844
|
+
l2_cache=self._experimental_config.l2_cache,
|
|
845
|
+
hbm_ddr=self._hbm_ddr,
|
|
846
|
+
pcie=self._pcie,
|
|
847
|
+
sync_enable=self._sync_enable,
|
|
848
|
+
record_shapes=self._record_shapes,
|
|
849
|
+
data_simplification=self._experimental_config.data_simplification,
|
|
850
|
+
mstx=self._experimental_config.mstx,
|
|
851
|
+
mstx_domain_include=self._experimental_config.mstx_domain_include,
|
|
852
|
+
mstx_domain_exclude=self._experimental_config.mstx_domain_exclude,
|
|
853
|
+
export_type=self._experimental_config.export_type,
|
|
854
|
+
sys_io=self._experimental_config.sys_io,
|
|
855
|
+
sys_interconnection=self._experimental_config.sys_interconnection,
|
|
856
|
+
host_sys=self._experimental_config.host_sys,
|
|
857
|
+
schedule=self._schedule,
|
|
858
|
+
on_trace_ready=self._on_trace_ready,
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
def __enter__(self) -> 'Profile':
|
|
862
|
+
self._profiler.__enter__()
|
|
863
|
+
return self
|
|
864
|
+
|
|
865
|
+
def __exit__(self, exe_type, exe_val, exc_tb):
|
|
866
|
+
self._profiler.__exit__(exe_type, exe_val, exc_tb)
|
|
867
|
+
|
|
868
|
+
def __del__(self):
|
|
869
|
+
self._profiler.__del__()
|
|
855
870
|
|
|
856
871
|
def start(self) -> None:
|
|
857
872
|
"""
|
|
@@ -916,14 +931,7 @@ class Profile:
|
|
|
916
931
|
... prof.step()
|
|
917
932
|
... prof.stop()
|
|
918
933
|
"""
|
|
919
|
-
|
|
920
|
-
logger.warning("The profile has already started. Do not turn on again in the open state.")
|
|
921
|
-
return
|
|
922
|
-
self._has_started = True
|
|
923
|
-
self.action_controller.transit_action(ProfilerAction.NONE, self.current_action)
|
|
924
|
-
if self.record_steps:
|
|
925
|
-
self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
|
|
926
|
-
self._step_rec_fn.start()
|
|
934
|
+
self._profiler.start()
|
|
927
935
|
|
|
928
936
|
def stop(self) -> None:
|
|
929
937
|
"""
|
|
@@ -987,13 +995,7 @@ class Profile:
|
|
|
987
995
|
... prof.step()
|
|
988
996
|
... prof.stop()
|
|
989
997
|
"""
|
|
990
|
-
|
|
991
|
-
logger.error("The profile has not started. Do not turn off again in the closed state.")
|
|
992
|
-
return
|
|
993
|
-
self._has_started = False
|
|
994
|
-
if self.record_steps and self._step_rec_fn:
|
|
995
|
-
self._step_rec_fn.stop()
|
|
996
|
-
self.action_controller.transit_action(self.current_action, None)
|
|
998
|
+
self._profiler.stop()
|
|
997
999
|
|
|
998
1000
|
def step(self) -> None:
|
|
999
1001
|
"""
|
|
@@ -1004,71 +1006,59 @@ class Profile:
|
|
|
1004
1006
|
RuntimeError: If the `schedule` parameter is not set.
|
|
1005
1007
|
|
|
1006
1008
|
Examples:
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
... prof.step()
|
|
1009
|
+
>>> import numpy as np
|
|
1010
|
+
>>> import mindspore
|
|
1011
|
+
>>> from mindspore import nn, context
|
|
1012
|
+
>>> import mindspore.dataset as ds
|
|
1013
|
+
>>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics, ExportType
|
|
1014
|
+
>>>
|
|
1015
|
+
>>> class Net(nn.Cell):
|
|
1016
|
+
... def __init__(self):
|
|
1017
|
+
... super(Net, self).__init__()
|
|
1018
|
+
... self.fc = nn.Dense(2,2)
|
|
1019
|
+
... def construct(self, x):
|
|
1020
|
+
... return self.fc(x)
|
|
1021
|
+
>>>
|
|
1022
|
+
>>> def generator():
|
|
1023
|
+
... for i in range(2):
|
|
1024
|
+
... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
|
|
1025
|
+
>>>
|
|
1026
|
+
>>> def train(net):
|
|
1027
|
+
... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
|
|
1028
|
+
... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
1029
|
+
... data = ds.GeneratorDataset(generator, ["data", "label"])
|
|
1030
|
+
... model = mindspore.train.Model(net, loss, optimizer)
|
|
1031
|
+
... model.train(1, data)
|
|
1032
|
+
>>>
|
|
1033
|
+
>>> if __name__ == '__main__':
|
|
1034
|
+
... # If the device_target is GPU, set the device_target to "GPU"
|
|
1035
|
+
... context.set_context(mode=mindspore.GRAPH_MODE)
|
|
1036
|
+
... mindspore.set_device("Ascend")
|
|
1037
|
+
...
|
|
1038
|
+
... # Init Profiler
|
|
1039
|
+
... experimental_config = mindspore.profiler._ExperimentalConfig(
|
|
1040
|
+
... profiler_level=ProfilerLevel.Level0,
|
|
1041
|
+
... aic_metrics=AicoreMetrics.AiCoreNone,
|
|
1042
|
+
... l2_cache=False,
|
|
1043
|
+
... mstx=False,
|
|
1044
|
+
... data_simplification=False,
|
|
1045
|
+
... export_type=[ExportType.Text])
|
|
1046
|
+
... steps = 10
|
|
1047
|
+
... net = Net()
|
|
1048
|
+
... # Note that the Profiler should be initialized before model.train
|
|
1049
|
+
... with mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
|
|
1050
|
+
... schedule=mindspore.profiler.schedule(wait=0, warmup=0, active=1,
|
|
1051
|
+
... repeat=1, skip_first=0),
|
|
1052
|
+
... on_trace_ready=mindspore.profiler.tensorboard_trace_handler("./data"),
|
|
1053
|
+
... profile_memory=False,
|
|
1054
|
+
... experimental_config=experimental_config) as prof:
|
|
1055
|
+
...
|
|
1056
|
+
... # Train Model
|
|
1057
|
+
... for step in range(steps):
|
|
1058
|
+
... train(net)
|
|
1059
|
+
... prof.step()
|
|
1059
1060
|
"""
|
|
1060
|
-
|
|
1061
|
-
logger.error("profile is stopped, step takes no effect!")
|
|
1062
|
-
return
|
|
1063
|
-
if self.record_steps and self._step_rec_fn:
|
|
1064
|
-
self._step_rec_fn.stop()
|
|
1065
|
-
prev_action = self.current_action
|
|
1066
|
-
self.step_num += 1
|
|
1067
|
-
self.current_action = self.schedule(self.step_num)
|
|
1068
|
-
self.action_controller.transit_action(prev_action, self.current_action)
|
|
1069
|
-
if self.record_steps:
|
|
1070
|
-
self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
|
|
1071
|
-
self._step_rec_fn.start()
|
|
1061
|
+
self._profiler.step()
|
|
1072
1062
|
|
|
1073
1063
|
def add_metadata(self, key: str, value: str):
|
|
1074
1064
|
"""
|
|
@@ -1085,18 +1075,8 @@ class Profile:
|
|
|
1085
1075
|
... # Call Profiler add_metadata
|
|
1086
1076
|
... prof.add_metadata("test_key", "test_value")
|
|
1087
1077
|
"""
|
|
1088
|
-
if not isinstance(key, str) or not isinstance(value, str):
|
|
1089
|
-
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
1090
|
-
return
|
|
1091
1078
|
|
|
1092
|
-
|
|
1093
|
-
if getsizeof(self._metadata) + add_size < ProfilerMetaData.MAX_META_SIZE:
|
|
1094
|
-
if key in self._metadata:
|
|
1095
|
-
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
1096
|
-
self._metadata[key] = value
|
|
1097
|
-
ProfilerMetaData.set_metadata(self._metadata)
|
|
1098
|
-
else:
|
|
1099
|
-
logger.warning("Too many metadata added. Skip this metadata")
|
|
1079
|
+
self._profiler.add_metadata(key, value)
|
|
1100
1080
|
|
|
1101
1081
|
def add_metadata_json(self, key: str, value: str):
|
|
1102
1082
|
"""
|
|
@@ -1114,35 +1094,7 @@ class Profile:
|
|
|
1114
1094
|
... # Call Profiler add_metadata_json
|
|
1115
1095
|
... prof.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
|
|
1116
1096
|
"""
|
|
1117
|
-
|
|
1118
|
-
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
1119
|
-
return
|
|
1120
|
-
|
|
1121
|
-
add_size = getsizeof(key) + getsizeof(value)
|
|
1122
|
-
if getsizeof(self._metadata) + add_size < ProfilerMetaData.MAX_META_SIZE:
|
|
1123
|
-
try:
|
|
1124
|
-
if key in self._metadata:
|
|
1125
|
-
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
1126
|
-
self._metadata[key] = json.loads(value)
|
|
1127
|
-
ProfilerMetaData.set_metadata(self._metadata)
|
|
1128
|
-
except ValueError:
|
|
1129
|
-
logger.warning("The metadata value must be json format string. Skip this metadata")
|
|
1130
|
-
else:
|
|
1131
|
-
logger.warning("Too many metadata added. Skip this metadata")
|
|
1132
|
-
|
|
1133
|
-
def __enter__(self) -> 'Profile':
|
|
1134
|
-
if not self._has_started:
|
|
1135
|
-
self.start()
|
|
1136
|
-
return self
|
|
1137
|
-
|
|
1138
|
-
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
1139
|
-
if self._has_started:
|
|
1140
|
-
self.stop()
|
|
1141
|
-
|
|
1142
|
-
def __del__(self):
|
|
1143
|
-
if self._has_started:
|
|
1144
|
-
self.stop()
|
|
1145
|
-
logger.warning("profile is stopped at the end of the program.")
|
|
1097
|
+
self._profiler.add_metadata_json(key, value)
|
|
1146
1098
|
|
|
1147
1099
|
|
|
1148
1100
|
def analyse(profiler_path: str, max_process_number: int = os.cpu_count() // 2, pretty=False, step_list=None,
|
|
@@ -56,7 +56,7 @@ class ProfilerActionController:
|
|
|
56
56
|
This method is called when the trace is ready to notify the callback function.
|
|
57
57
|
"""
|
|
58
58
|
if self.on_trace_ready:
|
|
59
|
-
self.on_trace_ready(
|
|
59
|
+
self.on_trace_ready()
|
|
60
60
|
|
|
61
61
|
def transit_action(self, prev_action: ProfilerAction, current_action: ProfilerAction) -> None:
|
|
62
62
|
"""
|
|
@@ -76,14 +76,14 @@ class ProfilerInterface:
|
|
|
76
76
|
logger.info("ProfilerInterface stop")
|
|
77
77
|
|
|
78
78
|
@classmethod
|
|
79
|
-
def analyse(cls
|
|
79
|
+
def analyse(cls):
|
|
80
80
|
"""ProfilerInterface analyse"""
|
|
81
81
|
if not cls.is_initialized:
|
|
82
82
|
logger.warning("ProfilerInterface analyse failed, profiler has not been initialized.")
|
|
83
83
|
return
|
|
84
84
|
|
|
85
85
|
for profiler in cls.platform_profilers_set:
|
|
86
|
-
profiler.analyse(
|
|
86
|
+
profiler.analyse()
|
|
87
87
|
|
|
88
88
|
logger.info("ProfilerInterface analyse")
|
|
89
89
|
|
|
@@ -1503,7 +1503,7 @@ class SymbolTree(Observer, Observable, NodeManager):
|
|
|
1503
1503
|
"""
|
|
1504
1504
|
# ast.Constant can be check without eval
|
|
1505
1505
|
if isinstance(ast_node, ast.Constant):
|
|
1506
|
-
return True, bool(
|
|
1506
|
+
return True, bool(ast.value)
|
|
1507
1507
|
# Get the module where the code of ast_node is located
|
|
1508
1508
|
file_path = inspect.getfile(type(self.get_origin_network()))
|
|
1509
1509
|
module = None
|
mindspore/runtime/__init__.py
CHANGED
|
@@ -17,10 +17,9 @@
|
|
|
17
17
|
The runtime interface.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
from mindspore.runtime.executor import launch_blocking, dispatch_threads_num, set_cpu_affinity
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
memory_replay, reset_peak_memory_stats, memory_summary, memory_allocated,\
|
|
20
|
+
from mindspore.runtime.executor import launch_blocking, dispatch_threads_num, set_cpu_affinity, set_kernel_launch_group
|
|
21
|
+
from mindspore.runtime.memory import set_memory, memory_stats, memory_reserved, max_memory_reserved, empty_cache, \
|
|
22
|
+
memory_replay, reset_peak_memory_stats, memory_summary, memory_allocated, \
|
|
24
23
|
max_memory_allocated, reset_max_memory_reserved, reset_max_memory_allocated
|
|
25
24
|
from mindspore.runtime.stream import Stream, synchronize, set_cur_stream, current_stream, \
|
|
26
25
|
default_stream, communication_stream, StreamCtx
|
|
@@ -28,8 +27,7 @@ from mindspore.runtime.event import Event
|
|
|
28
27
|
from .executor import launch_blocking
|
|
29
28
|
|
|
30
29
|
__all__ = [
|
|
31
|
-
"launch_blocking", "dispatch_threads_num", "set_cpu_affinity",
|
|
32
|
-
"set_kernel_launch_group", "set_kernel_launch_capture",
|
|
30
|
+
"launch_blocking", "dispatch_threads_num", "set_cpu_affinity", "set_kernel_launch_group",
|
|
33
31
|
"Stream", "communication_stream", "synchronize", "set_cur_stream", "current_stream", "default_stream", "StreamCtx",
|
|
34
32
|
"set_memory", "memory_stats", "memory_reserved", "max_memory_reserved", "empty_cache", "memory_replay",
|
|
35
33
|
"reset_peak_memory_stats", "memory_summary", "memory_allocated", "max_memory_allocated",
|
mindspore/runtime/executor.py
CHANGED
|
@@ -179,9 +179,6 @@ def set_kernel_launch_group(thread_num=2, kernel_group_num=8):
|
|
|
179
179
|
if RuntimeConf.get_instance().is_kernel_launch_group_configured():
|
|
180
180
|
raise RuntimeError("The 'kernel_launch_group' can not be set repeatedly.")
|
|
181
181
|
|
|
182
|
-
if RuntimeConf.get_instance().get_enable_kernel_launch_capture():
|
|
183
|
-
raise RuntimeError("The kernel launch group and kernel launch capture can not be set together")
|
|
184
|
-
|
|
185
182
|
if thread_num < 1:
|
|
186
183
|
raise ValueError(f"The value of thread_num should be at least 1, but got {thread_num}")
|
|
187
184
|
|
|
@@ -193,27 +190,3 @@ def set_kernel_launch_group(thread_num=2, kernel_group_num=8):
|
|
|
193
190
|
f"be evenly divisible by thread_num: {thread_num}")
|
|
194
191
|
|
|
195
192
|
return RuntimeConf.get_instance().set_kernel_launch_group(thread_num, kernel_group_num)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
@args_type_check(enable_capture_graph=bool)
|
|
199
|
-
def set_kernel_launch_capture(enable_capture_graph):
|
|
200
|
-
"""
|
|
201
|
-
In O0/O1 mode, the incremental inference scenario supports graph capture.
|
|
202
|
-
By capturing the CPU-side operator dispatch behavior into a graph,
|
|
203
|
-
the performance of CPU-side operator dispatch is improved.
|
|
204
|
-
|
|
205
|
-
.. warning::
|
|
206
|
-
This is an experimental API that is subject to change or deletion.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
enable_capture_graph (bool): Whether to enable graph capture.
|
|
210
|
-
It can be turned on or off at any position in the script.
|
|
211
|
-
|
|
212
|
-
Examples:
|
|
213
|
-
>>> import mindspore as ms
|
|
214
|
-
>>> ms.runtime.set_kernel_launch_capture(enable_capture_graph=True)
|
|
215
|
-
"""
|
|
216
|
-
if RuntimeConf.get_instance().is_kernel_launch_group_configured():
|
|
217
|
-
raise RuntimeError("The kernel launch group and kernel launch capture can not be set together")
|
|
218
|
-
|
|
219
|
-
return RuntimeConf.get_instance().set_kernel_launch_capture(enable_capture_graph)
|
mindspore/runtime/memory.py
CHANGED
|
@@ -50,7 +50,6 @@ def set_memory(init_size="2GB", increase_size="2GB", max_size="1024GB", optimize
|
|
|
50
50
|
The format is "xxGB". Default is the maximum available memory of the device, expressed as ``1024GB``.
|
|
51
51
|
optimize_level (str): The memory optimize level. The value must be in ['O0', 'O1']. Default: ``O0`` .
|
|
52
52
|
huge_page_reserve_size (str): The reserved size of huge page memory. The format is "xxGB". Default: ``0GB``.
|
|
53
|
-
When virtual memory is enabled, reserve huge page function is not available and this parameter is ignored.
|
|
54
53
|
|
|
55
54
|
Supported Platforms:
|
|
56
55
|
``Ascend`` ``GPU`` ``CPU``
|
mindspore/swresample-4.dll
CHANGED
|
Binary file
|
mindspore/swscale-6.dll
CHANGED
|
Binary file
|
mindspore/tinyxml2.dll
CHANGED
|
Binary file
|
mindspore/train/_utils.py
CHANGED
|
@@ -26,7 +26,7 @@ import numpy as np
|
|
|
26
26
|
from mindspore.common.tensor import Tensor
|
|
27
27
|
from mindspore._c_expression import TensorPy as Tensor_
|
|
28
28
|
from mindspore._c_expression import MSContext, ms_ctx_param
|
|
29
|
-
from mindspore.common.dtype import
|
|
29
|
+
from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype
|
|
30
30
|
from mindspore.common import dtype as mstype
|
|
31
31
|
from mindspore import context
|
|
32
32
|
from mindspore import log as logger
|
|
@@ -54,7 +54,7 @@ def _convert_type(types):
|
|
|
54
54
|
"""
|
|
55
55
|
ms_types = []
|
|
56
56
|
for np_type in types:
|
|
57
|
-
ms_type =
|
|
57
|
+
ms_type = pytype_to_dtype(np_type)
|
|
58
58
|
ms_types.append(ms_type)
|
|
59
59
|
return ms_types
|
|
60
60
|
|
|
@@ -131,7 +131,7 @@ def _construct_tensor_list(types, shapes, batch_expand_num=1):
|
|
|
131
131
|
new_shape += (item * batch_expand_num,)
|
|
132
132
|
else:
|
|
133
133
|
new_shape += (item,)
|
|
134
|
-
tensor = Tensor(np.zeros(new_shape,
|
|
134
|
+
tensor = Tensor(np.zeros(new_shape, dtype_to_nptype(type_)), dtype=type_)
|
|
135
135
|
tensor.virtual_flag = True
|
|
136
136
|
tensor_list.append(tensor)
|
|
137
137
|
return tensor_list
|
mindspore/train/amp.py
CHANGED
|
@@ -463,6 +463,9 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
|
|
|
463
463
|
``Addcdiv``, ``Addcmul``, ``Cross``, ``_PyboostCrossPrim``, ``Dot``, ``GridSampler2D``, ``GridSampler3D``,
|
|
464
464
|
``BiasAdd``, ``AddN``, ``Concat``
|
|
465
465
|
|
|
466
|
+
For details on automatic mixed precision, refer to
|
|
467
|
+
`Automatic Mix Precision <https://www.mindspore.cn/tutorials/en/master/beginner/mixed_precision.html>`_ .
|
|
468
|
+
|
|
466
469
|
Note:
|
|
467
470
|
- Repeatedly calling mixed-precision interfaces, such as `custom_mixed_precision` and `auto_mixed_precision`,
|
|
468
471
|
can result in a larger network hierarchy and slower performance.
|
|
@@ -60,8 +60,7 @@ def _fill_param_into_net(net, parameter_list):
|
|
|
60
60
|
if np_val.shape == (1,):
|
|
61
61
|
parameter_dict[param_name] = Parameter(np_val, name=param_name)
|
|
62
62
|
elif np_val.shape == ():
|
|
63
|
-
|
|
64
|
-
parameter_dict[param_name] = Parameter(Tensor(np_val.tolist(), mstype._pytype_to_dtype(np_val.dtype)),
|
|
63
|
+
parameter_dict[param_name] = Parameter(Tensor(np_val.tolist(), mstype.pytype_to_dtype(np_val.dtype)),
|
|
65
64
|
name=param_name)
|
|
66
65
|
else:
|
|
67
66
|
parameter_dict[param_name] = Parameter(Tensor(np_val), name=param_name)
|