mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -0
- mindspore/_extends/parse/parser.py +22 -28
- mindspore/_extends/parse/standard_method.py +1 -15
- mindspore/_extends/pijit/pijit_func_white_list.py +5 -2
- mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
- mindspore/amp.py +18 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +12 -18
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +38 -102
- mindspore/common/_utils.py +1 -9
- mindspore/common/api.py +106 -155
- mindspore/common/{dynamic_shape/auto_dynamic_shape.py → auto_dynamic_shape.py} +23 -17
- mindspore/common/dtype.py +57 -98
- mindspore/common/dump.py +1 -1
- mindspore/common/file_system.py +9 -59
- mindspore/common/hook_handle.py +3 -22
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +20 -4
- mindspore/common/recompute.py +4 -2
- mindspore/common/tensor.py +52 -38
- mindspore/communication/_hccl_management.py +297 -0
- mindspore/context.py +21 -15
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +1 -35
- mindspore/dataset/engine/datasets.py +315 -330
- mindspore/dataset/engine/datasets_user_defined.py +22 -38
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +5 -17
- mindspore/dataset/vision/utils.py +21 -632
- mindspore/device_context/ascend/op_tuning.py +1 -35
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -3
- mindspore/include/api/cell.h +4 -28
- mindspore/include/api/cfg.h +7 -24
- mindspore/include/api/context.h +0 -1
- mindspore/include/api/delegate.h +2 -0
- mindspore/include/api/dual_abi_helper.h +19 -100
- mindspore/include/api/graph.h +1 -14
- mindspore/include/api/kernel.h +3 -16
- mindspore/include/api/kernel_api.h +1 -9
- mindspore/include/api/metrics/accuracy.h +0 -9
- mindspore/include/api/model.h +1 -5
- mindspore/include/api/model_group.h +0 -4
- mindspore/include/api/model_parallel_runner.h +0 -2
- mindspore/include/api/status.h +10 -48
- mindspore/include/api/types.h +1 -6
- mindspore/include/dataset/constants.h +0 -9
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +2 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -5
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/distributed/__init__.py +0 -4
- mindspore/mint/distributed/distributed.py +14 -217
- mindspore/mint/nn/layer/_functions.py +2 -1
- mindspore/mint/nn/layer/conv.py +6 -6
- mindspore/mint/nn/layer/normalization.py +3 -3
- mindspore/nn/cell.py +174 -216
- mindspore/nn/layer/activation.py +2 -4
- mindspore/nn/layer/basic.py +13 -7
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/optim/adam.py +3 -1
- mindspore/nn/optim/lamb.py +3 -1
- mindspore/nn/optim/tft_wrapper.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +5 -39
- mindspore/nn/wrap/grad_reducer.py +15 -0
- mindspore/numpy/array_creations.py +2 -2
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_op_impl/cpu/__init__.py +0 -1
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +2 -12
- mindspore/ops/auto_generate/gen_extend_func.py +4 -4
- mindspore/ops/auto_generate/gen_ops_def.py +16 -290
- mindspore/ops/auto_generate/gen_ops_prim.py +76 -563
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/function/__init__.py +0 -1
- mindspore/ops/function/array_func.py +6 -10
- mindspore/ops/function/debug_func.py +2 -4
- mindspore/ops/function/grad/grad_func.py +12 -4
- mindspore/ops/function/math_func.py +32 -44
- mindspore/ops/function/nn_func.py +20 -18
- mindspore/ops/functional.py +1 -2
- mindspore/ops/functional_overload.py +12 -23
- mindspore/ops/operations/_inner_ops.py +12 -11
- mindspore/ops/operations/array_ops.py +50 -4
- mindspore/ops/operations/comm_ops.py +15 -1
- mindspore/ops/operations/custom_ops.py +4 -10
- mindspore/ops/operations/debug_ops.py +6 -6
- mindspore/ops/operations/manually_defined/ops_def.py +12 -12
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +1 -1
- mindspore/ops/primitive.py +10 -3
- mindspore/ops/tensor_method.py +7 -16
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +16 -0
- mindspore/parallel/_auto_parallel_context.py +15 -5
- mindspore/parallel/_parallel_serialization.py +2 -3
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_transformer/transformer.py +4 -4
- mindspore/parallel/_utils.py +11 -5
- mindspore/parallel/auto_parallel.py +9 -23
- mindspore/parallel/checkpoint_transform.py +0 -2
- mindspore/parallel/cluster/process_entity/_api.py +1 -4
- mindspore/parallel/cluster/run.py +3 -5
- mindspore/parallel/function/reshard_func.py +5 -6
- mindspore/parallel/nn/parallel_cell_wrapper.py +3 -40
- mindspore/parallel/nn/parallel_grad_reducer.py +8 -0
- mindspore/parallel/shard.py +21 -7
- mindspore/parallel/transform_safetensors.py +4 -10
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +9 -10
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +0 -9
- mindspore/profiler/common/profiler_context.py +2 -25
- mindspore/profiler/common/profiler_meta_data.py +0 -1
- mindspore/profiler/common/profiler_op_analyse.py +6 -10
- mindspore/{ops/_op_impl/cpu/joinedstr_op.py → profiler/common/validator/__init__.py} +1 -15
- mindspore/profiler/common/validator/validate_path.py +84 -0
- mindspore/profiler/dynamic_profiler.py +46 -91
- mindspore/profiler/envprofiler.py +5 -30
- mindspore/profiler/experimental_config.py +1 -16
- mindspore/profiler/platform/cpu_profiler.py +4 -10
- mindspore/profiler/platform/npu_profiler.py +1 -1
- mindspore/profiler/profiler.py +145 -193
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/runtime/__init__.py +4 -6
- mindspore/runtime/executor.py +0 -27
- mindspore/runtime/memory.py +0 -1
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +3 -3
- mindspore/train/amp.py +3 -0
- mindspore/train/callback/_callback.py +1 -2
- mindspore/train/callback/_checkpoint.py +8 -1
- mindspore/train/callback/_flops_collector.py +6 -10
- mindspore/train/callback/_train_fault_tolerance.py +7 -3
- mindspore/train/data_sink.py +4 -4
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +20 -4
- mindspore/train/serialization.py +15 -35
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/utils.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +193 -192
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +0 -1109
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/dynamic_shape/enable_dynamic.py +0 -197
- /mindspore/common/{dynamic_shape/_auto_dynamic.py → _auto_dynamic.py} +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1453,7 +1453,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1453
1453
|
>>> # When use use_past=True, it includes two steps to implement the incremental prediction.
|
|
1454
1454
|
>>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
|
|
1455
1455
|
>>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
|
|
1456
|
-
>>> init_reset = Tensor([True], mstype.
|
|
1456
|
+
>>> init_reset = Tensor([True], mstype.bool_)
|
|
1457
1457
|
>>> # Set is_first_iteration=True to generate the full memory states
|
|
1458
1458
|
>>> model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
|
|
1459
1459
|
... num_heads=2, use_past=True)
|
|
@@ -1467,7 +1467,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1467
1467
|
(2, 2, 16, 4)
|
|
1468
1468
|
>>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
|
|
1469
1469
|
>>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
|
|
1470
|
-
>>> init_reset = Tensor([False], mstype.
|
|
1470
|
+
>>> init_reset = Tensor([False], mstype.bool_)
|
|
1471
1471
|
>>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
|
|
1472
1472
|
>>> # the full sequence.
|
|
1473
1473
|
>>> model.add_flags_recursive(is_first_iteration=False)
|
|
@@ -2375,7 +2375,7 @@ class TransformerEncoder(Cell):
|
|
|
2375
2375
|
>>> # When use use_past=True, it includes two steps to implement the incremental prediction.
|
|
2376
2376
|
>>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
|
|
2377
2377
|
>>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
|
|
2378
|
-
>>> init_reset = Tensor([True], mstype.
|
|
2378
|
+
>>> init_reset = Tensor([True], mstype.bool_)
|
|
2379
2379
|
>>> # Set is_first_iteration=True to generate the full memory states
|
|
2380
2380
|
>>> model = TransformerEncoder(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
|
|
2381
2381
|
... num_heads=2, num_layers=2, use_past=True)
|
|
@@ -2389,7 +2389,7 @@ class TransformerEncoder(Cell):
|
|
|
2389
2389
|
(2, 2, 16, 4)
|
|
2390
2390
|
>>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
|
|
2391
2391
|
>>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
|
|
2392
|
-
>>> init_reset = Tensor([False], mstype.
|
|
2392
|
+
>>> init_reset = Tensor([False], mstype.bool_)
|
|
2393
2393
|
>>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
|
|
2394
2394
|
>>> # the full sequence.
|
|
2395
2395
|
>>> model.add_flags_recursive(is_first_iteration=False)
|
mindspore/parallel/_utils.py
CHANGED
|
@@ -21,7 +21,7 @@ import mindspore as ms
|
|
|
21
21
|
from mindspore import context, log as logger
|
|
22
22
|
from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
|
|
23
23
|
from mindspore.common.tensor import Tensor
|
|
24
|
-
from mindspore.common.dtype import
|
|
24
|
+
from mindspore.common.dtype import dtype_to_nptype
|
|
25
25
|
from mindspore.common import dtype as mstype
|
|
26
26
|
from mindspore.communication.management import get_group_size, get_rank
|
|
27
27
|
from mindspore.communication._comm_helper import _is_initialized
|
|
@@ -156,7 +156,7 @@ def _is_in_auto_parallel_mode():
|
|
|
156
156
|
|
|
157
157
|
|
|
158
158
|
def _is_parallel_mode():
|
|
159
|
-
if not _is_initialized():
|
|
159
|
+
if not _is_initialized() or context.get_context('mode') == context.PYNATIVE_MODE:
|
|
160
160
|
return False
|
|
161
161
|
if os.getenv("RUN_MODE") != "predict":
|
|
162
162
|
return False
|
|
@@ -173,6 +173,12 @@ def _is_in_hybrid_parallel_mode():
|
|
|
173
173
|
return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
|
|
174
174
|
|
|
175
175
|
|
|
176
|
+
def _is_pynative_parallel():
|
|
177
|
+
parallel_mode = context.get_auto_parallel_context('parallel_mode')
|
|
178
|
+
return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
|
|
179
|
+
context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
|
|
180
|
+
|
|
181
|
+
|
|
176
182
|
def _get_full_batch():
|
|
177
183
|
"""Get whether to use full_batch."""
|
|
178
184
|
return auto_parallel_context().get_full_batch()
|
|
@@ -446,7 +452,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
|
|
|
446
452
|
batchsize_per_device = item
|
|
447
453
|
else:
|
|
448
454
|
new_shape += (item,)
|
|
449
|
-
new_tensor_numpy = np.zeros(new_shape,
|
|
455
|
+
new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
|
|
450
456
|
start = stage_rank * batchsize_per_device
|
|
451
457
|
new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
|
|
452
458
|
else:
|
|
@@ -460,7 +466,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
|
|
|
460
466
|
end = (stage_rank % dataset_strategy[index][i] + 1) * item
|
|
461
467
|
s = slice(start, end, 1)
|
|
462
468
|
slice_index += (s,)
|
|
463
|
-
new_tensor_numpy = np.zeros(new_shape,
|
|
469
|
+
new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
|
|
464
470
|
new_tensor_numpy[slice_index] = data.asnumpy()
|
|
465
471
|
new_tensor = Tensor(new_tensor_numpy, dtype=type_)
|
|
466
472
|
lst.append(new_tensor)
|
|
@@ -767,7 +773,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
|
|
|
767
773
|
"""
|
|
768
774
|
If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
|
|
769
775
|
"""
|
|
770
|
-
if not _get_full_batch():
|
|
776
|
+
if not _is_pynative_parallel() or not _get_full_batch():
|
|
771
777
|
return grads
|
|
772
778
|
|
|
773
779
|
device_num = _get_device_num()
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Cell of auto parallel"""
|
|
16
16
|
import os
|
|
17
|
-
from mindspore import jit
|
|
18
17
|
from mindspore.nn.cell import Cell
|
|
19
18
|
from mindspore.parallel.shard import Layout
|
|
20
19
|
from mindspore.communication.management import get_rank, get_group_size
|
|
@@ -282,8 +281,7 @@ class AutoParallel(Cell):
|
|
|
282
281
|
Note:
|
|
283
282
|
- It only works when `parallel_mode=sharding_propagation`.
|
|
284
283
|
- When performing distributed training, users can first save the strategy using dryrun on a single device
|
|
285
|
-
and then load strategy to perform distributed training.
|
|
286
|
-
save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
|
|
284
|
+
and then load strategy to perform distributed training.
|
|
287
285
|
|
|
288
286
|
Args:
|
|
289
287
|
file_path (str): Path to save parallel strategy json, must be an absolute path.
|
|
@@ -513,17 +511,17 @@ class AutoParallel(Cell):
|
|
|
513
511
|
raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
514
512
|
"must be larger than zero, but got value: {}.".format(stages))
|
|
515
513
|
if not isinstance(output_broadcast, bool):
|
|
516
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
514
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
517
515
|
"must be bool type, but got the type : {}.".format(type(output_broadcast)))
|
|
518
516
|
if not isinstance(interleave, bool):
|
|
519
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
517
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
520
518
|
"must be bool type, but got the type : {}.".format(type(interleave)))
|
|
521
519
|
if not isinstance(scheduler, str):
|
|
522
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
520
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
523
521
|
"must be str type, but got the type : {}.".format(type(scheduler)))
|
|
524
|
-
if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"
|
|
522
|
+
if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
|
|
525
523
|
raise ValueError("For 'AutoParallel.pipeline', the argument "
|
|
526
|
-
"'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'
|
|
524
|
+
"'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
|
|
527
525
|
" but got the value : {}."
|
|
528
526
|
.format(scheduler))
|
|
529
527
|
self._pipeline_stages = stages
|
|
@@ -667,11 +665,8 @@ class AutoParallel(Cell):
|
|
|
667
665
|
- recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
|
|
668
666
|
if True.
|
|
669
667
|
Default: False.
|
|
670
|
-
- grad_matmul_communication_overlap (bool
|
|
671
|
-
|
|
672
|
-
that this feature is disabled. When set to str, it only optimizes the specified communication
|
|
673
|
-
operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
|
|
674
|
-
only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
|
|
668
|
+
- grad_matmul_communication_overlap (bool): Enable overlap between dw matmul and
|
|
669
|
+
tensor parallel communication ops if True. Default: False.
|
|
675
670
|
- grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
|
|
676
671
|
in sequence parallel and flashattentionscoregrad ops if True. Default: False.
|
|
677
672
|
- enable_communication_fusion (bool): Enable communication fusion to optimize the number of
|
|
@@ -686,9 +681,7 @@ class AutoParallel(Cell):
|
|
|
686
681
|
and optimizer parallel allgather communication if True. Currently, do not support
|
|
687
682
|
`O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
|
|
688
683
|
Default: False.
|
|
689
|
-
- computation_communication_fusion_level (int): Enable the fusion between compute and communicate
|
|
690
|
-
which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
|
|
691
|
-
execution of these tasks during operation, thereby enhancing performance.
|
|
684
|
+
- computation_communication_fusion_level (int): Enable the fusion between compute and communicate.
|
|
692
685
|
Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
|
|
693
686
|
This is an experimental configuration, may be changed or canceled in the future.
|
|
694
687
|
|
|
@@ -699,12 +692,6 @@ class AutoParallel(Cell):
|
|
|
699
692
|
- 2: Apply fusion to backward nodes.
|
|
700
693
|
|
|
701
694
|
- 3: Apply fusion to all nodes.
|
|
702
|
-
|
|
703
|
-
.. warning::
|
|
704
|
-
After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
|
|
705
|
-
used. Please note that this operator is still in an experimental stage and may be changed or
|
|
706
|
-
removed in the future.
|
|
707
|
-
|
|
708
695
|
- dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
|
|
709
696
|
support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
|
|
710
697
|
|
|
@@ -748,6 +735,5 @@ class AutoParallel(Cell):
|
|
|
748
735
|
self._transformer_opt_config = file_path
|
|
749
736
|
ctx.ascend_config['parallel_speed_up_json_path'] = file_path
|
|
750
737
|
|
|
751
|
-
@jit
|
|
752
738
|
def construct(self, *args, **kwargs):
|
|
753
739
|
return self.network(*args, **kwargs)
|
|
@@ -1165,8 +1165,6 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1165
1165
|
train_strategy_filename = ms.context.get_auto_parallel_context("strategy_ckpt_load_file")
|
|
1166
1166
|
|
|
1167
1167
|
_train_strategy = build_searched_strategy(train_strategy_filename)
|
|
1168
|
-
if not _train_strategy:
|
|
1169
|
-
return True
|
|
1170
1168
|
train_strategy = _convert_to_list(_train_strategy)
|
|
1171
1169
|
|
|
1172
1170
|
train_dev_count = 1
|
|
@@ -179,12 +179,9 @@ class _ProcessManager:
|
|
|
179
179
|
self.is_simulation = self.sim_level != -1
|
|
180
180
|
if self.is_simulation:
|
|
181
181
|
os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
|
|
182
|
-
if self.sim_rank_id == -1:
|
|
183
|
-
self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
|
|
184
182
|
elif os.getenv("MS_SIMULATION_LEVEL"):
|
|
185
183
|
self.is_simulation = True
|
|
186
|
-
|
|
187
|
-
self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
|
|
184
|
+
self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
|
|
188
185
|
if os.getenv("RANK_SIZE"):
|
|
189
186
|
self.exported_rank_size = os.getenv("RANK_SIZE")
|
|
190
187
|
# If sim_rank_id is set, single worker can be started.
|
|
@@ -125,16 +125,14 @@ def get_args():
|
|
|
125
125
|
default=-1,
|
|
126
126
|
type=int,
|
|
127
127
|
choices=[0, 1, 2, 3],
|
|
128
|
-
help="specifies simulation level.
|
|
129
|
-
"
|
|
128
|
+
help="specifies simulation level. When this argument is set, msrun only spawns one process "
|
|
129
|
+
"but export RANK_SIZE with value worker_num and RANK_ID with value sim_rank_id."
|
|
130
130
|
)
|
|
131
131
|
parser.add_argument(
|
|
132
132
|
"--sim_rank_id",
|
|
133
133
|
default=-1,
|
|
134
134
|
type=int,
|
|
135
|
-
help="specifies simulation process's rank id.
|
|
136
|
-
"is spawned on dryrun mode, functioning equivalently to environment variable 'RANK_ID' "
|
|
137
|
-
"while having higher priority."
|
|
135
|
+
help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
|
|
138
136
|
)
|
|
139
137
|
parser.add_argument(
|
|
140
138
|
"--rank_table_file",
|
|
@@ -42,12 +42,11 @@ def reshard(tensor, layout):
|
|
|
42
42
|
can check :class:`mindspore.parallel.Layout` for reference.
|
|
43
43
|
|
|
44
44
|
Note:
|
|
45
|
-
In the Graph mode, this function can set the sharding propagation strategy of a tensor.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
The method is currently not supported in PyNative mode.
|
|
45
|
+
- In the Graph mode, this function can set the sharding propagation strategy of a tensor.
|
|
46
|
+
For those tensor do not manually be set, their strategies are decided by the sharding
|
|
47
|
+
strategy propagation algorithm automatically.
|
|
48
|
+
- In PyNative mode, you can use this method to arrange tensors in a cell (that is, cells
|
|
49
|
+
that use Cell.shard/F.shard in PyNative mode) that is executed in parallel in graph mode.
|
|
51
50
|
|
|
52
51
|
Args:
|
|
53
52
|
tensor (Tensor): The tensor to be set the sharding strategy.
|
|
@@ -28,8 +28,7 @@ from mindspore import log as logger
|
|
|
28
28
|
|
|
29
29
|
class PipelineCell(Cell):
|
|
30
30
|
"""
|
|
31
|
-
Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training
|
|
32
|
-
and specify the segment info.
|
|
31
|
+
Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training.
|
|
33
32
|
|
|
34
33
|
Note:
|
|
35
34
|
micro_size must be greater or equal to pipeline stages.
|
|
@@ -38,8 +37,6 @@ class PipelineCell(Cell):
|
|
|
38
37
|
network (Cell): The target network to wrap.
|
|
39
38
|
micro_size (int): MicroBatch size.
|
|
40
39
|
stage_config (dict, optional): The stage configuration for each cell's execution in pipeline parallel.
|
|
41
|
-
segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
|
|
42
|
-
Default ``None``.
|
|
43
40
|
|
|
44
41
|
Supported Platforms:
|
|
45
42
|
``Ascend``
|
|
@@ -51,7 +48,7 @@ class PipelineCell(Cell):
|
|
|
51
48
|
>>> net = LeNet5()
|
|
52
49
|
>>> net = nn.PipelineCell(net, 4, stage_config={"cell_name_0": 0, "cell_name_1": 1})
|
|
53
50
|
"""
|
|
54
|
-
def __init__(self, network, micro_size, stage_config=None
|
|
51
|
+
def __init__(self, network, micro_size, stage_config=None):
|
|
55
52
|
super(PipelineCell, self).__init__(auto_prefix=False)
|
|
56
53
|
self.network = network
|
|
57
54
|
self.micro_inputs = nn.CellList()
|
|
@@ -107,37 +104,6 @@ class PipelineCell(Cell):
|
|
|
107
104
|
logger.warning(cell_name)
|
|
108
105
|
raise KeyError("For 'PipelineCell', the argument 'stage_config' : {} is not "
|
|
109
106
|
"found in 'network' : {}".format(config_dict, network))
|
|
110
|
-
if segment_config is None:
|
|
111
|
-
return
|
|
112
|
-
self._config_segment(segment_config)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def _config_segment(self, segment_config):
|
|
116
|
-
"""
|
|
117
|
-
Config segment num for cell.
|
|
118
|
-
"""
|
|
119
|
-
config_dict = segment_config.copy()
|
|
120
|
-
|
|
121
|
-
for cell_name, cell in self.network.cells_and_names():
|
|
122
|
-
if cell_name in segment_config:
|
|
123
|
-
setattr(cell, "pipeline_segment", segment_config[cell_name])
|
|
124
|
-
del config_dict[cell_name]
|
|
125
|
-
if str(self.network) in segment_config:
|
|
126
|
-
setattr(self.network, "pipeline_segment", segment_config[str(self.network)])
|
|
127
|
-
del config_dict[str(self.network)]
|
|
128
|
-
# if there are any config elements left, print them
|
|
129
|
-
if config_dict:
|
|
130
|
-
for config_cell_name, config_segment_num in config_dict.items():
|
|
131
|
-
logger.error("pipeline_cell segment_config set pipeline_segment fail!")
|
|
132
|
-
logger.warning("config cell name:" + str(config_cell_name) +
|
|
133
|
-
" config segment num:" + str(config_segment_num))
|
|
134
|
-
logger.warning("network:" + str(self.network))
|
|
135
|
-
logger.warning("cell name available:")
|
|
136
|
-
for cell_name, _ in self.network.cells_and_names():
|
|
137
|
-
logger.warning(cell_name)
|
|
138
|
-
raise KeyError("For 'PipelineCell', the argument 'segment_config' : {} is not "
|
|
139
|
-
"found in 'network' : {}".format(config_dict, self.network))
|
|
140
|
-
|
|
141
107
|
|
|
142
108
|
def construct(self, *args, **kwargs):
|
|
143
109
|
ret = None
|
|
@@ -153,8 +119,7 @@ class PipelineCell(Cell):
|
|
|
153
119
|
|
|
154
120
|
class Pipeline(PipelineCell):
|
|
155
121
|
"""
|
|
156
|
-
Specify the number of micro_batch for pipeline parallelism and the division rules for stage
|
|
157
|
-
and specify the segment info.
|
|
122
|
+
Specify the number of micro_batch for pipeline parallelism and the division rules for stage.
|
|
158
123
|
|
|
159
124
|
Note:
|
|
160
125
|
micro_size must be greater or equal to pipeline stages.
|
|
@@ -163,8 +128,6 @@ class Pipeline(PipelineCell):
|
|
|
163
128
|
network (Cell): The target network to wrap.
|
|
164
129
|
micro_size (int): MicroBatch size.
|
|
165
130
|
stage_config (dict, optional): Stage configuration for cell's execution in pipeline parallel. Default ``None``.
|
|
166
|
-
segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
|
|
167
|
-
Default ``None``.
|
|
168
131
|
|
|
169
132
|
Raises:
|
|
170
133
|
TypeError: The type of `net` is not cell.
|
|
@@ -17,6 +17,7 @@ from __future__ import absolute_import
|
|
|
17
17
|
|
|
18
18
|
__all__ = ['PipelineGradReducer']
|
|
19
19
|
|
|
20
|
+
from mindspore import context
|
|
20
21
|
from mindspore.nn.cell import Cell
|
|
21
22
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
22
23
|
import mindspore.common.dtype as mstype
|
|
@@ -139,6 +140,7 @@ class PipelineGradReducer(Cell):
|
|
|
139
140
|
"""
|
|
140
141
|
def __init__(self, parameters, scale_sense=1.0, opt_shard=None):
|
|
141
142
|
super(PipelineGradReducer, self).__init__(auto_prefix=False)
|
|
143
|
+
self._check_mode()
|
|
142
144
|
self.accu_grads = parameters.clone(prefix="accu_grads", init="zeros")
|
|
143
145
|
self.grad_reducer = Identity()
|
|
144
146
|
self.degree = Tensor(1, mstype.float32)
|
|
@@ -160,3 +162,9 @@ class PipelineGradReducer(Cell):
|
|
|
160
162
|
accu_grads = self.grad_reducer(self.accu_grads)
|
|
161
163
|
new_grads = self.hyper_map(F.partial(grad_scale, self.scale_sense * self.degree), grads, accu_grads)
|
|
162
164
|
return new_grads
|
|
165
|
+
|
|
166
|
+
def _check_mode(self):
|
|
167
|
+
"""check parallel mode"""
|
|
168
|
+
mode = context.get_context('mode')
|
|
169
|
+
if mode != context.GRAPH_MODE:
|
|
170
|
+
raise RuntimeError(f"PipelineGradReducer only support graph mode, but get {mode}")
|
mindspore/parallel/shard.py
CHANGED
|
@@ -253,6 +253,13 @@ class Shard(Shard_):
|
|
|
253
253
|
"will be overwritten as False.")
|
|
254
254
|
ms.set_algo_parameters(fully_use_devices=False)
|
|
255
255
|
|
|
256
|
+
if ms.context.get_auto_parallel_context("full_batch_is_set") is False and \
|
|
257
|
+
ms.context.get_context("mode") == ms.context.PYNATIVE_MODE:
|
|
258
|
+
logger.warning("When calling the shard interface, "
|
|
259
|
+
"'dataset_strategy' or 'full_batch' is not manually set by the user, "
|
|
260
|
+
"and the 'dataset_strategy' will be set to 'full_batch'.")
|
|
261
|
+
ms.context.set_auto_parallel_context(dataset_strategy="full_batch")
|
|
262
|
+
|
|
256
263
|
if self._is_attrs_has_been_set(fn, in_strategy, out_strategy, device, level):
|
|
257
264
|
return self.shard_fn
|
|
258
265
|
shard_ = Shard()
|
|
@@ -387,10 +394,11 @@ class Shard(Shard_):
|
|
|
387
394
|
f"The tuple strategy for each dimension should be tuple(int).")
|
|
388
395
|
|
|
389
396
|
|
|
390
|
-
def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
|
|
397
|
+
def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
|
|
391
398
|
"""
|
|
392
399
|
Specify the input and output slicing strategy for a Cell or function.
|
|
393
|
-
In
|
|
400
|
+
In PyNative mode, use this method to specify a Cell for distributed
|
|
401
|
+
execution in graph mode. In Graph mode, use this method to specify distribution strategy for a Cell,
|
|
394
402
|
strategy for others will be set by sharding propagation.
|
|
395
403
|
in_strategy and out_strategy define the input and output layout respectively.
|
|
396
404
|
in_strategy/out_strategy should be a tuple, each element of which corresponds to the desired layout of
|
|
@@ -402,9 +410,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
|
|
|
402
410
|
- It is valid only in semi auto parallel or auto parallel mode.
|
|
403
411
|
In other parallel modes, strategies set here will be ignored.
|
|
404
412
|
- If the input contain Parameter, its strategy should be set in `in_strategy`.
|
|
405
|
-
|
|
406
|
-
.. warning::
|
|
407
|
-
The method is currently not supported in PyNative mode.
|
|
413
|
+
- This method currently does not support dynamic shapes.
|
|
408
414
|
|
|
409
415
|
Args:
|
|
410
416
|
fn (Union[Cell, Function]): Function to be executed in parallel.
|
|
@@ -426,12 +432,19 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
|
|
|
426
432
|
has been set, the parameter setting will be ignored. Supported
|
|
427
433
|
only when `fn` is a Cell with parameters.
|
|
428
434
|
Default: ``None`` .
|
|
435
|
+
device (str, optional): Select a certain `device` target. It is not in use right now.
|
|
436
|
+
Support ["CPU", "GPU", "Ascend"]. Default: ``"Ascend"`` .
|
|
437
|
+
level (int, optional): Option for parallel strategy infer algorithm, namely the object function,
|
|
438
|
+
maximize computation
|
|
439
|
+
over communication ratio, maximize speed performance, minimize memory usage etc. It is not in
|
|
440
|
+
use right now. Support [0, 1, 2]. Default: ``0`` .
|
|
429
441
|
|
|
430
442
|
Returns:
|
|
431
443
|
Function, return the function that will be executed under auto parallel process.
|
|
432
444
|
|
|
433
445
|
Raises:
|
|
434
446
|
AssertionError: If parallel mode is not "auto_parallel" nor "semi_auto_parallel".
|
|
447
|
+
AssertionError: If device_target it not "Ascend" or "GPU".
|
|
435
448
|
TypeError: If `in_strategy` is not a tuple.
|
|
436
449
|
TypeError: If `out_strategy` is not a tuple or None.
|
|
437
450
|
TypeError: If any element in `in_strategy` is not a tuple(int) or tuple(mindspore.parallel.Layout).
|
|
@@ -439,6 +452,8 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
|
|
|
439
452
|
TypeError: If `parameter_plan` is not a dict or None.
|
|
440
453
|
TypeError: If any key in `parameter_plan` is not a str.
|
|
441
454
|
TypeError: If any value in `parameter_plan` is not a tuple(int) or a tuple(mindspore.parallel.Layout).
|
|
455
|
+
TypeError: If `device` is not a str.
|
|
456
|
+
TypeError: If `level` is not an integer.
|
|
442
457
|
|
|
443
458
|
Supported Platforms:
|
|
444
459
|
``Ascend``
|
|
@@ -541,5 +556,4 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
|
|
|
541
556
|
if not isinstance(fn, (ms.nn.Cell)):
|
|
542
557
|
logger.warning("'fn' is not a mindspore.nn.Cell, and its definition cannot involve Parameter; "
|
|
543
558
|
"otherwise, the result may be incorrect.")
|
|
544
|
-
|
|
545
|
-
return Shard()(fn, in_strategy, out_strategy, parameter_plan)
|
|
559
|
+
return Shard()(fn, in_strategy, out_strategy, parameter_plan, device, level)
|
|
@@ -37,6 +37,7 @@ from mindspore import log as logger
|
|
|
37
37
|
from mindspore.log import vlog_print
|
|
38
38
|
from mindspore.common.parameter import Parameter
|
|
39
39
|
from mindspore.common.tensor import Tensor
|
|
40
|
+
from mindspore.common import np_dtype
|
|
40
41
|
from mindspore.parallel._parallel_serialization import _get_device_num_from_strategy, _make_dir, \
|
|
41
42
|
_extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
|
|
42
43
|
_insert_opt_shard_reshape, _extract_src_dst_layout_map_by_src, _insert_expand_layout_reshape
|
|
@@ -50,6 +51,8 @@ from mindspore.common import dtype as mstype
|
|
|
50
51
|
|
|
51
52
|
safetensors_to_mstype = {'Int4': mstype.qint4x2}
|
|
52
53
|
|
|
54
|
+
np.bfloat16 = np_dtype.bfloat16
|
|
55
|
+
|
|
53
56
|
MAX_HEADER_SIZE = 100 * 1000 * 1000
|
|
54
57
|
|
|
55
58
|
dtype_size = {
|
|
@@ -93,6 +96,7 @@ numpy_dtype = {
|
|
|
93
96
|
"I64": np.int64,
|
|
94
97
|
"U64": np.uint64,
|
|
95
98
|
"F16": np.float16,
|
|
99
|
+
"BF16": np.bfloat16, # no bf16
|
|
96
100
|
"F32": np.float32,
|
|
97
101
|
"F64": np.float64,
|
|
98
102
|
}
|
|
@@ -211,16 +215,6 @@ class PySafeSlice:
|
|
|
211
215
|
|
|
212
216
|
@property
|
|
213
217
|
def dtype(self):
|
|
214
|
-
"""Get dtype by numpy_dtype"""
|
|
215
|
-
if self.info["dtype"] == "BF16":
|
|
216
|
-
from mindspore.common import np_dtype
|
|
217
|
-
if not np_dtype.np_dtype_valid(True):
|
|
218
|
-
raise TypeError(
|
|
219
|
-
"The Numpy bfloat16 data type is not supported now, please ensure that the current "
|
|
220
|
-
"Numpy version is not less than the version when the mindspore is compiled, "
|
|
221
|
-
"and the major versions are same."
|
|
222
|
-
)
|
|
223
|
-
return np_dtype.bfloat16
|
|
224
218
|
return numpy_dtype[self.info["dtype"]]
|
|
225
219
|
|
|
226
220
|
@property
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Ascend kernel details viewer"""
|
|
16
|
+
import csv
|
|
16
17
|
import os
|
|
17
18
|
from decimal import Decimal
|
|
18
19
|
|
|
@@ -24,7 +25,8 @@ from mindspore.profiler.common.constant import (
|
|
|
24
25
|
ProfilerActivity
|
|
25
26
|
)
|
|
26
27
|
from mindspore.profiler.common.log import ProfilerLogger
|
|
27
|
-
from mindspore.profiler.common.
|
|
28
|
+
from mindspore.profiler.common.path_manager import PathManager
|
|
29
|
+
|
|
28
30
|
from mindspore import log as logger
|
|
29
31
|
|
|
30
32
|
|
|
@@ -108,15 +110,12 @@ class AscendKernelDetailsViewer(BaseViewer):
|
|
|
108
110
|
Write data to csv file.
|
|
109
111
|
"""
|
|
110
112
|
self._logger.info("Kernel details saved start")
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
data=csv_data,
|
|
118
|
-
headers=self.kernel_details_headers
|
|
119
|
-
)
|
|
113
|
+
PathManager.check_directory_path_writeable(os.path.dirname(self._save_path))
|
|
114
|
+
with open(self._save_path, "w", newline="", encoding="utf-8") as csvfile:
|
|
115
|
+
writer = csv.writer(csvfile)
|
|
116
|
+
writer.writerow(self.kernel_details_headers)
|
|
117
|
+
for row in self.op_summary:
|
|
118
|
+
writer.writerow([row[field] for field in self.op_summary_headers])
|
|
120
119
|
self._logger.info("Kernel details saved done")
|
|
121
120
|
|
|
122
121
|
def _update_headers(self):
|
|
@@ -330,7 +330,7 @@ class AscendOpMemoryViewer:
|
|
|
330
330
|
res.append(self._combine_alloc_and_free_event(alloc_event, free_event))
|
|
331
331
|
alloc_event, free_event = None, None
|
|
332
332
|
elif alloc_event is None and free_event:
|
|
333
|
-
self._logger.
|
|
333
|
+
self._logger.error("Alloc event is None, but free event is not None")
|
|
334
334
|
|
|
335
335
|
if alloc_event:
|
|
336
336
|
res.append(self._combine_alloc_and_free_event(alloc_event))
|
|
@@ -145,14 +145,14 @@ class MsprofCmdTool:
|
|
|
145
145
|
if os.environ.get("ASCEND_TOOLKIT_HOME"):
|
|
146
146
|
temp_path = os.path.join(os.environ.get("ASCEND_TOOLKIT_HOME"), "bin")
|
|
147
147
|
if os.path.isdir(temp_path) and self._MSPROF_CMD in os.listdir(temp_path):
|
|
148
|
-
return
|
|
148
|
+
return temp_path
|
|
149
149
|
|
|
150
150
|
for path in os.environ.get("PATH", "").split(":"):
|
|
151
151
|
if self._ASCEND_MARK in path:
|
|
152
152
|
prefix = path.split(self._ASCEND_MARK)[0]
|
|
153
153
|
temp_path = os.path.join(prefix, self._HIAI_MSPROF_TAIL)
|
|
154
154
|
if os.path.isdir(temp_path) and self._MSPROF_CMD in os.listdir(temp_path):
|
|
155
|
-
return
|
|
155
|
+
return temp_path
|
|
156
156
|
|
|
157
157
|
return None
|
|
158
158
|
|
|
@@ -73,15 +73,6 @@ class PathManager:
|
|
|
73
73
|
msg = f"file size exceeds the limit: {cls.MAX_FILE_SIZE}, file size: {file_size}"
|
|
74
74
|
raise ProfilerPathErrorException(msg)
|
|
75
75
|
|
|
76
|
-
file_stat = os.stat(path)
|
|
77
|
-
if file_stat.st_mode & (stat.S_IWGRP | stat.S_IWOTH):
|
|
78
|
-
msg = f"File path {path} has group or others writable permissions, which is not allowed."
|
|
79
|
-
raise ProfilerPathErrorException(msg)
|
|
80
|
-
|
|
81
|
-
if stat.S_ISCHR(file_stat.st_mode) or stat.S_ISBLK(file_stat.st_mode):
|
|
82
|
-
msg = f"Invalid input path is a character or block device path: {path}"
|
|
83
|
-
raise ProfilerPathErrorException(msg)
|
|
84
|
-
|
|
85
76
|
@classmethod
|
|
86
77
|
def get_directory_size(cls, directory: str, unit: str = 'MB') -> float:
|
|
87
78
|
"""
|
|
@@ -43,7 +43,6 @@ from mindspore.profiler.schedule import Schedule
|
|
|
43
43
|
from mindspore import context
|
|
44
44
|
from mindspore import log as logger
|
|
45
45
|
from mindspore.profiler.common.profiler_info import ProfilerInfo
|
|
46
|
-
from mindspore.profiler.experimental_config import _ExperimentalConfig
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
@Singleton
|
|
@@ -82,33 +81,11 @@ class ProfilerContext:
|
|
|
82
81
|
logger.warning(f"Both on_trace_ready path and output_path are provided. "
|
|
83
82
|
f"The on_trace_ready path takes effect. Final path is {final_path}")
|
|
84
83
|
kwargs["output_path"] = final_path
|
|
85
|
-
|
|
86
|
-
self._check_and_set_experimental_params(kwargs)
|
|
84
|
+
|
|
87
85
|
self._profiler_params_mgr: ProfilerParameters = ProfilerParameters(**kwargs)
|
|
88
86
|
self._profiler_path_mgr: ProfilerOutputPath = ProfilerOutputPath(rank_id=int(self._rank_id))
|
|
89
|
-
self._profiler_path_mgr.output_path = self._profiler_params_mgr.output_path
|
|
90
87
|
|
|
91
|
-
|
|
92
|
-
def _check_and_set_experimental_params(kwargs):
|
|
93
|
-
"""
|
|
94
|
-
Set experimental parameters
|
|
95
|
-
"""
|
|
96
|
-
if not isinstance(kwargs.get("experimental_config"), _ExperimentalConfig):
|
|
97
|
-
logger.warning("For Profiler, experimental_config value must be the "
|
|
98
|
-
"'mindspore.profiler._ExperimentalConfig' class, "
|
|
99
|
-
"reset to default value.")
|
|
100
|
-
return
|
|
101
|
-
kwargs["profiler_level"] = kwargs.get("experimental_config").profiler_level
|
|
102
|
-
kwargs["aic_metrics"] = kwargs.get("experimental_config").aic_metrics
|
|
103
|
-
kwargs["l2_cache"] = kwargs.get("experimental_config").l2_cache
|
|
104
|
-
kwargs["mstx"] = kwargs.get("experimental_config").mstx
|
|
105
|
-
kwargs["data_simplification"] = kwargs.get("experimental_config").data_simplification
|
|
106
|
-
kwargs["export_type"] = kwargs.get("experimental_config").export_type
|
|
107
|
-
kwargs["mstx_domain_include"] = kwargs.get("experimental_config").mstx_domain_include
|
|
108
|
-
kwargs["mstx_domain_exclude"] = kwargs.get("experimental_config").mstx_domain_exclude
|
|
109
|
-
kwargs["sys_io"] = kwargs.get("experimental_config").sys_io
|
|
110
|
-
kwargs["sys_interconnection"] = kwargs.get("experimental_config").sys_interconnection
|
|
111
|
-
kwargs["host_sys"] = kwargs.get("experimental_config").host_sys
|
|
88
|
+
self._profiler_path_mgr.output_path = self._profiler_params_mgr.output_path
|
|
112
89
|
|
|
113
90
|
@property
|
|
114
91
|
def on_trace_ready_output_path(self) -> str:
|