PyPI - mindspore - Versions diffs - 2.7.0__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl - Mend

mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (196) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +1 -1
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +2 -2
mindspore/_extends/builtin_operations.py +3 -3
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +3 -3
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -0
mindspore/_extends/parse/parser.py +22 -28
mindspore/_extends/parse/standard_method.py +1 -15
mindspore/_extends/pijit/pijit_func_white_list.py +5 -2
mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
mindspore/amp.py +18 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/common/__init__.py +12 -18
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +38 -102
mindspore/common/_utils.py +1 -9
mindspore/common/api.py +106 -155
mindspore/common/{dynamic_shape/auto_dynamic_shape.py → auto_dynamic_shape.py} +23 -17
mindspore/common/dtype.py +57 -98
mindspore/common/dump.py +1 -1
mindspore/common/file_system.py +9 -59
mindspore/common/hook_handle.py +3 -22
mindspore/common/np_dtype.py +3 -3
mindspore/common/parameter.py +20 -4
mindspore/common/recompute.py +4 -2
mindspore/common/tensor.py +52 -38
mindspore/communication/_hccl_management.py +297 -0
mindspore/context.py +21 -15
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/transforms.py +1 -1
mindspore/dataset/core/config.py +1 -35
mindspore/dataset/engine/datasets.py +315 -330
mindspore/dataset/engine/datasets_user_defined.py +22 -38
mindspore/dataset/transforms/c_transforms.py +2 -2
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/vision/__init__.py +1 -1
mindspore/dataset/vision/py_transforms.py +8 -8
mindspore/dataset/vision/transforms.py +5 -17
mindspore/dataset/vision/utils.py +21 -632
mindspore/device_context/ascend/op_tuning.py +1 -35
mindspore/dnnl.dll +0 -0
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -3
mindspore/include/api/cell.h +4 -28
mindspore/include/api/cfg.h +7 -24
mindspore/include/api/context.h +0 -1
mindspore/include/api/delegate.h +2 -0
mindspore/include/api/dual_abi_helper.h +19 -100
mindspore/include/api/graph.h +1 -14
mindspore/include/api/kernel.h +3 -16
mindspore/include/api/kernel_api.h +1 -9
mindspore/include/api/metrics/accuracy.h +0 -9
mindspore/include/api/model.h +1 -5
mindspore/include/api/model_group.h +0 -4
mindspore/include/api/model_parallel_runner.h +0 -2
mindspore/include/api/status.h +10 -48
mindspore/include/api/types.h +1 -6
mindspore/include/dataset/constants.h +0 -9
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/tools/cifar10.py +2 -3
mindspore/mindrecord/tools/cifar10_to_mr.py +5 -5
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_cpu_res_manager.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mindspore_ops_host.dll +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/distributed/__init__.py +0 -4
mindspore/mint/distributed/distributed.py +14 -217
mindspore/mint/nn/layer/_functions.py +2 -1
mindspore/mint/nn/layer/conv.py +6 -6
mindspore/mint/nn/layer/normalization.py +3 -3
mindspore/nn/cell.py +174 -216
mindspore/nn/layer/activation.py +2 -4
mindspore/nn/layer/basic.py +13 -7
mindspore/nn/layer/image.py +1 -1
mindspore/nn/optim/adam.py +3 -1
mindspore/nn/optim/lamb.py +3 -1
mindspore/nn/optim/tft_wrapper.py +3 -2
mindspore/nn/probability/distribution/_utils/utils.py +2 -2
mindspore/nn/wrap/cell_wrapper.py +5 -39
mindspore/nn/wrap/grad_reducer.py +15 -0
mindspore/numpy/array_creations.py +2 -2
mindspore/numpy/utils_const.py +1 -1
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
mindspore/ops/_op_impl/cpu/__init__.py +0 -1
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +2 -12
mindspore/ops/auto_generate/gen_extend_func.py +4 -4
mindspore/ops/auto_generate/gen_ops_def.py +16 -290
mindspore/ops/auto_generate/gen_ops_prim.py +76 -563
mindspore/ops/composite/base.py +1 -1
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
mindspore/ops/function/__init__.py +0 -1
mindspore/ops/function/array_func.py +6 -10
mindspore/ops/function/debug_func.py +2 -4
mindspore/ops/function/grad/grad_func.py +12 -4
mindspore/ops/function/math_func.py +32 -44
mindspore/ops/function/nn_func.py +20 -18
mindspore/ops/functional.py +1 -2
mindspore/ops/functional_overload.py +12 -23
mindspore/ops/operations/_inner_ops.py +12 -11
mindspore/ops/operations/array_ops.py +50 -4
mindspore/ops/operations/comm_ops.py +15 -1
mindspore/ops/operations/custom_ops.py +4 -10
mindspore/ops/operations/debug_ops.py +6 -6
mindspore/ops/operations/manually_defined/ops_def.py +12 -12
mindspore/ops/operations/math_ops.py +5 -5
mindspore/ops/operations/nn_ops.py +1 -1
mindspore/ops/primitive.py +10 -3
mindspore/ops/tensor_method.py +7 -16
mindspore/ops_generate/pyboost/gen_pyboost_func.py +16 -0
mindspore/parallel/_auto_parallel_context.py +15 -5
mindspore/parallel/_parallel_serialization.py +2 -3
mindspore/parallel/_ps_context.py +2 -2
mindspore/parallel/_transformer/transformer.py +4 -4
mindspore/parallel/_utils.py +11 -5
mindspore/parallel/auto_parallel.py +9 -23
mindspore/parallel/checkpoint_transform.py +0 -2
mindspore/parallel/cluster/process_entity/_api.py +1 -4
mindspore/parallel/cluster/run.py +3 -5
mindspore/parallel/function/reshard_func.py +5 -6
mindspore/parallel/nn/parallel_cell_wrapper.py +3 -40
mindspore/parallel/nn/parallel_grad_reducer.py +8 -0
mindspore/parallel/shard.py +21 -7
mindspore/parallel/transform_safetensors.py +4 -10
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +9 -10
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
mindspore/profiler/common/msprof_cmd_tool.py +2 -2
mindspore/profiler/common/path_manager.py +0 -9
mindspore/profiler/common/profiler_context.py +2 -25
mindspore/profiler/common/profiler_meta_data.py +0 -1
mindspore/profiler/common/profiler_op_analyse.py +6 -10
mindspore/{ops/_op_impl/cpu/joinedstr_op.py → profiler/common/validator/__init__.py} +1 -15
mindspore/profiler/common/validator/validate_path.py +84 -0
mindspore/profiler/dynamic_profiler.py +46 -91
mindspore/profiler/envprofiler.py +5 -30
mindspore/profiler/experimental_config.py +1 -16
mindspore/profiler/platform/cpu_profiler.py +4 -10
mindspore/profiler/platform/npu_profiler.py +1 -1
mindspore/profiler/profiler.py +145 -193
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +2 -2
mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
mindspore/runtime/__init__.py +4 -6
mindspore/runtime/executor.py +0 -27
mindspore/runtime/memory.py +0 -1
mindspore/runtime/thread_bind_core.py +1 -1
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/_utils.py +3 -3
mindspore/train/amp.py +3 -0
mindspore/train/callback/_callback.py +1 -2
mindspore/train/callback/_checkpoint.py +8 -1
mindspore/train/callback/_flops_collector.py +6 -10
mindspore/train/callback/_train_fault_tolerance.py +7 -3
mindspore/train/data_sink.py +4 -4
mindspore/train/dataset_helper.py +5 -5
mindspore/train/model.py +20 -4
mindspore/train/serialization.py +15 -35
mindspore/train/train_thor/model_thor.py +2 -2
mindspore/turbojpeg.dll +0 -0
mindspore/utils/hooks.py +81 -0
mindspore/utils/utils.py +8 -8
mindspore/version.py +1 -1
{mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +1 -1
{mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +193 -192
mindspore/_extends/parallel_compile/akg_compiler/custom.py +0 -1109
mindspore/common/dynamic_shape/__init__.py +0 -0
mindspore/common/dynamic_shape/enable_dynamic.py +0 -197
/mindspore/common/{dynamic_shape/_auto_dynamic.py → _auto_dynamic.py} +0 -0
{mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0

mindspore/parallel/_transformer/transformer.py CHANGED Viewed

@@ -1453,7 +1453,7 @@ class TransformerEncoderLayer(Cell):
             >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
             >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
             >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
-            >>> init_reset = Tensor([True], mstype.bool)
+            >>> init_reset = Tensor([True], mstype.bool_)
             >>> # Set is_first_iteration=True to generate the full memory states
             >>> model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
             ...                                 num_heads=2, use_past=True)
@@ -1467,7 +1467,7 @@ class TransformerEncoderLayer(Cell):
             (2, 2, 16, 4)
             >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
             >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
-            >>> init_reset = Tensor([False], mstype.bool)
+            >>> init_reset = Tensor([False], mstype.bool_)
             >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
             >>> # the full sequence.
             >>> model.add_flags_recursive(is_first_iteration=False)
@@ -2375,7 +2375,7 @@ class TransformerEncoder(Cell):
             >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
             >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
             >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
-            >>> init_reset = Tensor([True], mstype.bool)
+            >>> init_reset = Tensor([True], mstype.bool_)
             >>> # Set is_first_iteration=True to generate the full memory states
             >>> model = TransformerEncoder(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
             ...                            num_heads=2, num_layers=2, use_past=True)
@@ -2389,7 +2389,7 @@ class TransformerEncoder(Cell):
             (2, 2, 16, 4)
             >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
             >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
-            >>> init_reset = Tensor([False], mstype.bool)
+            >>> init_reset = Tensor([False], mstype.bool_)
             >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
             >>> # the full sequence.
             >>> model.add_flags_recursive(is_first_iteration=False)

mindspore/parallel/_utils.py CHANGED Viewed

@@ -21,7 +21,7 @@ import mindspore as ms
 from mindspore import context, log as logger
 from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
 from mindspore.common.tensor import Tensor
-from mindspore.common.dtype import _dtype_to_nptype
+from mindspore.common.dtype import dtype_to_nptype
 from mindspore.common import dtype as mstype
 from mindspore.communication.management import get_group_size, get_rank
 from mindspore.communication._comm_helper import _is_initialized
@@ -156,7 +156,7 @@ def _is_in_auto_parallel_mode():
 def _is_parallel_mode():
-    if not _is_initialized():
+    if not _is_initialized() or context.get_context('mode') == context.PYNATIVE_MODE:
         return False
     if os.getenv("RUN_MODE") != "predict":
         return False
@@ -173,6 +173,12 @@ def _is_in_hybrid_parallel_mode():
     return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
+def _is_pynative_parallel():
+    parallel_mode = context.get_auto_parallel_context('parallel_mode')
+    return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
+        context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
 def _get_full_batch():
     """Get whether to use full_batch."""
     return auto_parallel_context().get_full_batch()
@@ -446,7 +452,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
                     batchsize_per_device = item
                 else:
                     new_shape += (item,)
-            new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_))  # pylint:disable=protected-access
+            new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
             start = stage_rank * batchsize_per_device
             new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
         else:
@@ -460,7 +466,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
                 end = (stage_rank % dataset_strategy[index][i] + 1) * item
                 s = slice(start, end, 1)
                 slice_index += (s,)
-            new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_))  # pylint:disable=protected-access
+            new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
             new_tensor_numpy[slice_index] = data.asnumpy()
         new_tensor = Tensor(new_tensor_numpy, dtype=type_)
         lst.append(new_tensor)
@@ -767,7 +773,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
     """
     If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
     """
-    if not _get_full_batch():
+    if not _is_pynative_parallel() or not _get_full_batch():
         return grads
     device_num = _get_device_num()

mindspore/parallel/auto_parallel.py CHANGED Viewed

@@ -14,7 +14,6 @@
 # ============================================================================
 """Cell of auto parallel"""
 import os
-from mindspore import jit
 from mindspore.nn.cell import Cell
 from mindspore.parallel.shard import Layout
 from mindspore.communication.management import get_rank, get_group_size
@@ -282,8 +281,7 @@ class AutoParallel(Cell):
         Note:
             - It only works when `parallel_mode=sharding_propagation`.
             - When performing distributed training, users can first save the strategy using dryrun on a single device
-              and then load strategy to perform distributed training. Note that only the first device of each node will
-              save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
+              and then load strategy to perform distributed training.
         Args:
             file_path (str): Path to save parallel strategy json, must be an absolute path.
@@ -513,17 +511,17 @@ class AutoParallel(Cell):
             raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
                              "must be larger than zero, but got value: {}.".format(stages))
         if not isinstance(output_broadcast, bool):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'output_broadcast' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
                             "must be bool type, but got the type : {}.".format(type(output_broadcast)))
         if not isinstance(interleave, bool):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'interleave' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
                             "must be bool type, but got the type : {}.".format(type(interleave)))
         if not isinstance(scheduler, str):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'scheduler' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
                             "must be str type, but got the type : {}.".format(type(scheduler)))
-        if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp", "zero_bubble_v"):
+        if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
             raise ValueError("For 'AutoParallel.pipeline', the argument "
-                             "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'/'zero_bubble_v' ," \
+                             "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
                              " but got the value : {}."
                              .format(scheduler))
         self._pipeline_stages = stages
@@ -667,11 +665,8 @@ class AutoParallel(Cell):
                 - recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
                   if True.
                   Default: False.
-                - grad_matmul_communication_overlap (bool, str): When set to ``True``, it indicates that overlap
-                  between dw matmul and tensor parallel communication is enabled. When set to ``False``, it indicates
-                  that this feature is disabled. When set to str, it only optimizes the specified communication
-                  operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
-                  only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
+                - grad_matmul_communication_overlap (bool): Enable overlap between dw matmul and
+                  tensor parallel communication ops if True. Default: False.
                 - grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
                   in sequence parallel and flashattentionscoregrad ops if True. Default: False.
                 - enable_communication_fusion (bool): Enable communication fusion to optimize the number of
@@ -686,9 +681,7 @@ class AutoParallel(Cell):
                   and optimizer parallel allgather communication if True. Currently, do not support
                   `O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
                   Default: False.
-                - computation_communication_fusion_level (int): Enable the fusion between compute and communicate,
-                  which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
-                  execution of these tasks during operation, thereby enhancing performance.
+                - computation_communication_fusion_level (int): Enable the fusion between compute and communicate.
                   Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
                   This is an experimental configuration, may be changed or canceled in the future.
@@ -699,12 +692,6 @@ class AutoParallel(Cell):
                   - 2: Apply fusion to backward nodes.
                   - 3: Apply fusion to all nodes.
-                  .. warning::
-                      After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
-                      used. Please note that this operator is still in an experimental stage and may be changed or
-                      removed in the future.
                 - dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
                   support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
@@ -748,6 +735,5 @@ class AutoParallel(Cell):
         self._transformer_opt_config = file_path
         ctx.ascend_config['parallel_speed_up_json_path'] = file_path
-    @jit
     def construct(self, *args, **kwargs):
         return self.network(*args, **kwargs)

mindspore/parallel/checkpoint_transform.py CHANGED Viewed

@@ -1165,8 +1165,6 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
             train_strategy_filename = ms.context.get_auto_parallel_context("strategy_ckpt_load_file")
     _train_strategy = build_searched_strategy(train_strategy_filename)
-    if not _train_strategy:
-        return True
     train_strategy = _convert_to_list(_train_strategy)
     train_dev_count = 1

mindspore/parallel/cluster/process_entity/_api.py CHANGED Viewed

@@ -179,12 +179,9 @@ class _ProcessManager:
         self.is_simulation = self.sim_level != -1
         if self.is_simulation:
             os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
-            if self.sim_rank_id == -1:
-                self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
         elif os.getenv("MS_SIMULATION_LEVEL"):
             self.is_simulation = True
-            if self.sim_rank_id == -1:
-                self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
+            self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
             if os.getenv("RANK_SIZE"):
                 self.exported_rank_size = os.getenv("RANK_SIZE")
         # If sim_rank_id is set, single worker can be started.

mindspore/parallel/cluster/run.py CHANGED Viewed

@@ -125,16 +125,14 @@ def get_args():
         default=-1,
         type=int,
         choices=[0, 1, 2, 3],
-        help="specifies simulation level. This argument activates dryrun mode, functioning "
-             "equivalently to environment variable 'MS_SIMULATION_LEVEL' while having higher priority."
+        help="specifies simulation level. When this argument is set, msrun only spawns one process "
+             "but export RANK_SIZE with value worker_num and RANK_ID with value sim_rank_id."
     )
     parser.add_argument(
         "--sim_rank_id",
         default=-1,
         type=int,
-        help="specifies simulation process's rank id. When this argument is set, only one process "
-             "is spawned on dryrun mode, functioning equivalently to environment variable 'RANK_ID' "
-             "while having higher priority."
+        help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
     )
     parser.add_argument(
         "--rank_table_file",

mindspore/parallel/function/reshard_func.py CHANGED Viewed

@@ -42,12 +42,11 @@ def reshard(tensor, layout):
     can check :class:`mindspore.parallel.Layout` for reference.
     Note:
-        In the Graph mode, this function can set the sharding propagation strategy of a tensor.
-        For those tensor do not manually be set, their strategies are decided by the sharding
-        strategy propagation algorithm automatically.
-    .. warning::
-        The method is currently not supported in PyNative mode.
+        - In the Graph mode, this function can set the sharding propagation strategy of a tensor.
+          For those tensor do not manually be set, their strategies are decided by the sharding
+          strategy propagation algorithm automatically.
+        - In PyNative mode, you can use this method to arrange tensors in a cell (that is, cells
+          that use Cell.shard/F.shard in PyNative mode) that is executed in parallel in graph mode.
     Args:
         tensor (Tensor): The tensor to be set the sharding strategy.

mindspore/parallel/nn/parallel_cell_wrapper.py CHANGED Viewed

@@ -28,8 +28,7 @@ from mindspore import log as logger
 class PipelineCell(Cell):
     """
-    Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training,
-    and specify the segment info.
+    Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training.
     Note:
         micro_size must be greater or equal to pipeline stages.
@@ -38,8 +37,6 @@ class PipelineCell(Cell):
         network (Cell): The target network to wrap.
         micro_size (int): MicroBatch size.
         stage_config (dict, optional): The stage configuration for each cell's execution in pipeline parallel.
-        segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
-            Default ``None``.
     Supported Platforms:
         ``Ascend``
@@ -51,7 +48,7 @@ class PipelineCell(Cell):
         >>> net = LeNet5()
         >>> net = nn.PipelineCell(net, 4, stage_config={"cell_name_0": 0, "cell_name_1": 1})
     """
-    def __init__(self, network, micro_size, stage_config=None, segment_config=None):
+    def __init__(self, network, micro_size, stage_config=None):
         super(PipelineCell, self).__init__(auto_prefix=False)
         self.network = network
         self.micro_inputs = nn.CellList()
@@ -107,37 +104,6 @@ class PipelineCell(Cell):
                     logger.warning(cell_name)
                 raise KeyError("For 'PipelineCell', the argument 'stage_config' : {} is not "
                                "found in 'network' : {}".format(config_dict, network))
-        if segment_config is None:
-            return
-        self._config_segment(segment_config)
-    def _config_segment(self, segment_config):
-        """
-        Config segment num for cell.
-        """
-        config_dict = segment_config.copy()
-        for cell_name, cell in self.network.cells_and_names():
-            if cell_name in segment_config:
-                setattr(cell, "pipeline_segment", segment_config[cell_name])
-                del config_dict[cell_name]
-        if str(self.network) in segment_config:
-            setattr(self.network, "pipeline_segment", segment_config[str(self.network)])
-            del config_dict[str(self.network)]
-        # if there are any config elements left, print them
-        if config_dict:
-            for config_cell_name, config_segment_num in config_dict.items():
-                logger.error("pipeline_cell segment_config set pipeline_segment fail!")
-                logger.warning("config cell name:" + str(config_cell_name) +
-                               " config segment num:" + str(config_segment_num))
-            logger.warning("network:" + str(self.network))
-            logger.warning("cell name available:")
-            for cell_name, _ in self.network.cells_and_names():
-                logger.warning(cell_name)
-            raise KeyError("For 'PipelineCell', the argument 'segment_config' : {} is not "
-                           "found in 'network' : {}".format(config_dict, self.network))
     def construct(self, *args, **kwargs):
         ret = None
@@ -153,8 +119,7 @@ class PipelineCell(Cell):
 class Pipeline(PipelineCell):
     """
-    Specify the number of micro_batch for pipeline parallelism and the division rules for stage,
-    and specify the segment info.
+    Specify the number of micro_batch for pipeline parallelism and the division rules for stage.
     Note:
         micro_size must be greater or equal to pipeline stages.
@@ -163,8 +128,6 @@ class Pipeline(PipelineCell):
         network (Cell): The target network to wrap.
         micro_size (int): MicroBatch size.
         stage_config (dict, optional): Stage configuration for cell's execution in pipeline parallel. Default ``None``.
-        segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
-            Default ``None``.
     Raises:
         TypeError: The type of `net` is not cell.

mindspore/parallel/nn/parallel_grad_reducer.py CHANGED Viewed

@@ -17,6 +17,7 @@ from __future__ import absolute_import
 __all__ = ['PipelineGradReducer']
+from mindspore import context
 from mindspore.nn.cell import Cell
 from mindspore.ops import functional as F, composite as C, operations as P
 import mindspore.common.dtype as mstype
@@ -139,6 +140,7 @@ class PipelineGradReducer(Cell):
     """
     def __init__(self, parameters, scale_sense=1.0, opt_shard=None):
         super(PipelineGradReducer, self).__init__(auto_prefix=False)
+        self._check_mode()
         self.accu_grads = parameters.clone(prefix="accu_grads", init="zeros")
         self.grad_reducer = Identity()
         self.degree = Tensor(1, mstype.float32)
@@ -160,3 +162,9 @@ class PipelineGradReducer(Cell):
             accu_grads = self.grad_reducer(self.accu_grads)
             new_grads = self.hyper_map(F.partial(grad_scale, self.scale_sense * self.degree), grads, accu_grads)
         return new_grads
+    def _check_mode(self):
+        """check parallel mode"""
+        mode = context.get_context('mode')
+        if mode != context.GRAPH_MODE:
+            raise RuntimeError(f"PipelineGradReducer only support graph mode, but get {mode}")

mindspore/parallel/shard.py CHANGED Viewed

@@ -253,6 +253,13 @@ class Shard(Shard_):
                            "will be overwritten as False.")
             ms.set_algo_parameters(fully_use_devices=False)
+        if ms.context.get_auto_parallel_context("full_batch_is_set") is False and \
+            ms.context.get_context("mode") == ms.context.PYNATIVE_MODE:
+            logger.warning("When calling the shard interface, "
+                           "'dataset_strategy' or 'full_batch' is not manually set by the user, "
+                           "and the 'dataset_strategy' will be set to 'full_batch'.")
+            ms.context.set_auto_parallel_context(dataset_strategy="full_batch")
         if self._is_attrs_has_been_set(fn, in_strategy, out_strategy, device, level):
             return self.shard_fn
         shard_ = Shard()
@@ -387,10 +394,11 @@ class Shard(Shard_):
                 f"The tuple strategy for each dimension should be tuple(int).")
-def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
+def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
     """
     Specify the input and output slicing strategy for a Cell or function.
-    In Graph mode, use this method to specify distribution strategy for a Cell,
+    In PyNative mode, use this method to specify a Cell for distributed
+    execution in graph mode. In Graph mode, use this method to specify distribution strategy for a Cell,
     strategy for others will be set by sharding propagation.
     in_strategy and out_strategy define the input and output layout respectively.
     in_strategy/out_strategy should be a tuple, each element of which corresponds to the desired layout of
@@ -402,9 +410,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
         - It is valid only in semi auto parallel or auto parallel mode.
           In other parallel modes, strategies set here will be ignored.
         - If the input contain Parameter, its strategy should be set in `in_strategy`.
-    .. warning::
-        The method is currently not supported in PyNative mode.
+        - This method currently does not support dynamic shapes.
     Args:
         fn (Union[Cell, Function]): Function to be executed in parallel.
@@ -426,12 +432,19 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
                                             has been set, the parameter setting will be ignored. Supported
                                             only when `fn` is a Cell with parameters.
                                             Default: ``None`` .
+        device (str, optional): Select a certain `device` target. It is not in use right now.
+                                Support ["CPU", "GPU", "Ascend"]. Default: ``"Ascend"`` .
+        level (int, optional): Option for parallel strategy infer algorithm, namely the object function,
+            maximize computation
+            over communication ratio, maximize speed performance, minimize memory usage etc. It is not in
+            use right now. Support [0, 1, 2]. Default: ``0`` .
     Returns:
         Function, return the function that will be executed under auto parallel process.
     Raises:
         AssertionError: If parallel mode is not "auto_parallel" nor "semi_auto_parallel".
+        AssertionError: If device_target it not "Ascend" or "GPU".
         TypeError: If `in_strategy` is not a tuple.
         TypeError: If `out_strategy` is not a tuple or None.
         TypeError: If any element in `in_strategy` is not a tuple(int) or tuple(mindspore.parallel.Layout).
@@ -439,6 +452,8 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
         TypeError: If `parameter_plan` is not a dict or None.
         TypeError: If any key in `parameter_plan` is not a str.
         TypeError: If any value in `parameter_plan` is not a tuple(int) or a tuple(mindspore.parallel.Layout).
+        TypeError: If `device` is not a str.
+        TypeError: If `level` is not an integer.
     Supported Platforms:
         ``Ascend``
@@ -541,5 +556,4 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None):
     if not isinstance(fn, (ms.nn.Cell)):
         logger.warning("'fn' is not a mindspore.nn.Cell, and its definition cannot involve Parameter; "
                        "otherwise, the result may be incorrect.")
-    return Shard()(fn, in_strategy, out_strategy, parameter_plan)
+    return Shard()(fn, in_strategy, out_strategy, parameter_plan, device, level)

mindspore/parallel/transform_safetensors.py CHANGED Viewed

@@ -37,6 +37,7 @@ from mindspore import log as logger
 from mindspore.log import vlog_print
 from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
+from mindspore.common import np_dtype
 from mindspore.parallel._parallel_serialization import _get_device_num_from_strategy, _make_dir, \
     _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
     _insert_opt_shard_reshape, _extract_src_dst_layout_map_by_src, _insert_expand_layout_reshape
@@ -50,6 +51,8 @@ from mindspore.common import dtype as mstype
 safetensors_to_mstype = {'Int4': mstype.qint4x2}
+np.bfloat16 = np_dtype.bfloat16
 MAX_HEADER_SIZE = 100 * 1000 * 1000
 dtype_size = {
@@ -93,6 +96,7 @@ numpy_dtype = {
     "I64": np.int64,
     "U64": np.uint64,
     "F16": np.float16,
+    "BF16": np.bfloat16,  # no bf16
     "F32": np.float32,
     "F64": np.float64,
 }
@@ -211,16 +215,6 @@ class PySafeSlice:
     @property
     def dtype(self):
-        """Get dtype by numpy_dtype"""
-        if self.info["dtype"] == "BF16":
-            from mindspore.common import np_dtype
-            if not np_dtype.np_dtype_valid(True):
-                raise TypeError(
-                    "The Numpy bfloat16 data type is not supported now, please ensure that the current "
-                    "Numpy version is not less than the version when the mindspore is compiled, "
-                    "and the major versions are same."
-                )
-            return np_dtype.bfloat16
         return numpy_dtype[self.info["dtype"]]
     @property

mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """Ascend kernel details viewer"""
+import csv
 import os
 from decimal import Decimal
@@ -24,7 +25,8 @@ from mindspore.profiler.common.constant import (
     ProfilerActivity
 )
 from mindspore.profiler.common.log import ProfilerLogger
-from mindspore.profiler.common.file_manager import FileManager
+from mindspore.profiler.common.path_manager import PathManager
 from mindspore import log as logger
@@ -108,15 +110,12 @@ class AscendKernelDetailsViewer(BaseViewer):
         Write data to csv file.
         """
         self._logger.info("Kernel details saved start")
-        csv_data = []
-        for row in self.op_summary:
-            csv_row = [row[field] for field in self.op_summary_headers]
-            csv_data.append(csv_row)
-        FileManager.create_csv_file(
-            file_path=self._save_path,
-            data=csv_data,
-            headers=self.kernel_details_headers
-        )
+        PathManager.check_directory_path_writeable(os.path.dirname(self._save_path))
+        with open(self._save_path, "w", newline="", encoding="utf-8") as csvfile:
+            writer = csv.writer(csvfile)
+            writer.writerow(self.kernel_details_headers)
+            for row in self.op_summary:
+                writer.writerow([row[field] for field in self.op_summary_headers])
         self._logger.info("Kernel details saved done")
     def _update_headers(self):

mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py CHANGED Viewed

@@ -330,7 +330,7 @@ class AscendOpMemoryViewer:
                 res.append(self._combine_alloc_and_free_event(alloc_event, free_event))
                 alloc_event, free_event = None, None
             elif alloc_event is None and free_event:
-                self._logger.warning("Alloc event is None, but free event is not None")
+                self._logger.error("Alloc event is None, but free event is not None")
         if alloc_event:
             res.append(self._combine_alloc_and_free_event(alloc_event))

mindspore/profiler/common/msprof_cmd_tool.py CHANGED Viewed

@@ -145,14 +145,14 @@ class MsprofCmdTool:
         if os.environ.get("ASCEND_TOOLKIT_HOME"):
             temp_path = os.path.join(os.environ.get("ASCEND_TOOLKIT_HOME"), "bin")
             if os.path.isdir(temp_path) and self._MSPROF_CMD in os.listdir(temp_path):
-                return os.path.abspath(temp_path)
+                return temp_path
         for path in os.environ.get("PATH", "").split(":"):
             if self._ASCEND_MARK in path:
                 prefix = path.split(self._ASCEND_MARK)[0]
                 temp_path = os.path.join(prefix, self._HIAI_MSPROF_TAIL)
                 if os.path.isdir(temp_path) and self._MSPROF_CMD in os.listdir(temp_path):
-                    return os.path.abspath(temp_path)
+                    return temp_path
         return None

mindspore/profiler/common/path_manager.py CHANGED Viewed

@@ -73,15 +73,6 @@ class PathManager:
             msg = f"file size exceeds the limit: {cls.MAX_FILE_SIZE}, file size: {file_size}"
             raise ProfilerPathErrorException(msg)
-        file_stat = os.stat(path)
-        if file_stat.st_mode & (stat.S_IWGRP | stat.S_IWOTH):
-            msg = f"File path {path} has group or others writable permissions, which is not allowed."
-            raise ProfilerPathErrorException(msg)
-        if stat.S_ISCHR(file_stat.st_mode) or stat.S_ISBLK(file_stat.st_mode):
-            msg = f"Invalid input path is a character or block device path: {path}"
-            raise ProfilerPathErrorException(msg)
     @classmethod
     def get_directory_size(cls, directory: str, unit: str = 'MB') -> float:
         """

mindspore/profiler/common/profiler_context.py CHANGED Viewed

@@ -43,7 +43,6 @@ from mindspore.profiler.schedule import Schedule
 from mindspore import context
 from mindspore import log as logger
 from mindspore.profiler.common.profiler_info import ProfilerInfo
-from mindspore.profiler.experimental_config import _ExperimentalConfig
 @Singleton
@@ -82,33 +81,11 @@ class ProfilerContext:
                 logger.warning(f"Both on_trace_ready path and output_path are provided. "
                                f"The on_trace_ready path takes effect. Final path is {final_path}")
             kwargs["output_path"] = final_path
-        if kwargs.get("experimental_config"):
-            self._check_and_set_experimental_params(kwargs)
         self._profiler_params_mgr: ProfilerParameters = ProfilerParameters(**kwargs)
         self._profiler_path_mgr: ProfilerOutputPath = ProfilerOutputPath(rank_id=int(self._rank_id))
-        self._profiler_path_mgr.output_path = self._profiler_params_mgr.output_path
-    @staticmethod
-    def _check_and_set_experimental_params(kwargs):
-        """
-        Set experimental parameters
-        """
-        if not isinstance(kwargs.get("experimental_config"), _ExperimentalConfig):
-            logger.warning("For Profiler, experimental_config value must be the "
-                           "'mindspore.profiler._ExperimentalConfig' class, "
-                           "reset to default value.")
-            return
-        kwargs["profiler_level"] = kwargs.get("experimental_config").profiler_level
-        kwargs["aic_metrics"] = kwargs.get("experimental_config").aic_metrics
-        kwargs["l2_cache"] = kwargs.get("experimental_config").l2_cache
-        kwargs["mstx"] = kwargs.get("experimental_config").mstx
-        kwargs["data_simplification"] = kwargs.get("experimental_config").data_simplification
-        kwargs["export_type"] = kwargs.get("experimental_config").export_type
-        kwargs["mstx_domain_include"] = kwargs.get("experimental_config").mstx_domain_include
-        kwargs["mstx_domain_exclude"] = kwargs.get("experimental_config").mstx_domain_exclude
-        kwargs["sys_io"] = kwargs.get("experimental_config").sys_io
-        kwargs["sys_interconnection"] = kwargs.get("experimental_config").sys_interconnection
-        kwargs["host_sys"] = kwargs.get("experimental_config").host_sys
+        self._profiler_path_mgr.output_path = self._profiler_params_mgr.output_path
     @property
     def on_trace_ready_output_path(self) -> str:

mindspore/profiler/common/profiler_meta_data.py CHANGED Viewed

@@ -30,7 +30,6 @@ class ProfilerMetaData:
         This class is used to handle metadata.
     """
     metadata: Dict[str, str] = {}
-    MAX_META_SIZE = 100 * 1024 * 1024  # 100MB
     @classmethod
     def get_metadata(cls) -> Dict[str, str]: