PyPI - mindspore - Versions diffs - 2.3.0rc1__cp37-none-any.whl → 2.3.0rc2__cp37-none-any.whl - Mend

mindspore 2.3.0rc1cp37-none-any.whl → 2.3.0rc2cp37-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (316) hide show

mindspore/parallel/_auto_parallel_context.py CHANGED Viewed

@@ -65,6 +65,19 @@ class _ParallelOptimizerConfig:
     OPTIMIZER_WEIGHT_SHARD_SIZE = "optimizer_weight_shard_size"
+class _PipelineConfig:
+    """
+    The key of the Pipeline parallelism.
+    """
+    PIPELINE_INTERLEAVE = "pipeline_interleave"
+    PIPELINE_SCHEDULER = "pipeline_scheduler"
+class _PipelineScheduler:
+    PIPELINE_1F1B = "1f1b"
+    PIPELINE_GPIPE = "gpipe"
 class _AutoParallelContext:
     """
     _AutoParallelContext is the environment in which operations are executed
@@ -248,6 +261,16 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_pipeline_result_broadcast()
+    def get_pipeline_interleave(self):
+        """Get pipeline interleave flag"""
+        self.check_context_handle()
+        return self._context_handle.get_pipeline_interleave()
+    def get_pipeline_scheduler(self):
+        """Get pipeline scheduler"""
+        self.check_context_handle()
+        return self._context_handle.get_pipeline_scheduler()
     def set_pipeline_segments(self, segments):
         """Set the segments of the pipeline"""
         if isinstance(segments, bool) or not isinstance(segments, int):
@@ -796,16 +819,87 @@ class _AutoParallelContext:
                             .format(type(enable_parallel_optimizer)))
         self._context_handle.set_enable_parallel_optimizer(enable_parallel_optimizer)
+    def set_force_fp32_communication(self, force_fp32_communication):
+        """
+        Set enable/disable force fp32 communication.
+        Args:
+            set_force_fp32_communication (bool): Enable/disable force fp32 communication.
+        """
+        self.check_context_handle()
+        if not isinstance(force_fp32_communication, bool):
+            raise TypeError("For 'set_auto_parallel_context', "
+                            "the argument 'force_fp32_communication' must be bool, but got the type : {}."
+                            .format(type(force_fp32_communication)))
+        self._context_handle.set_force_fp32_communication(force_fp32_communication)
     def get_enable_fold_pipeline(self):
         """Get parallel optimizer flag."""
         self.check_context_handle()
         return self._context_handle.get_enable_fold_pipeline()
+    def set_pipeline_config(self, pipeline_config):
+        r"""
+        Set the configuration for pipeline parallelism. The configuration provides more detailed behavior control about
+        parallel training when pipeline parallelism is enabled.
+        Args:
+            pipeline_config (dict): The configuration for pipeline parallelism. It supports following keys:
+            - pipeline_interleave(bool): Setting true enable interleave scheduler for pipeline parallelism. This
+                                         scheduler requires more memory but less bubble.
+            - pipeline_scheduler(string): There are two choices, "1f1b" and "gpipe". default is "1f1b"
+              - 1f1b: It requires less memory and bubble ratio, for it run backward pass when corresponding forward pass
+                      finished.
+              - gpipe: It requires more memory and bubble ratio, for it run backward pass after all forward pass
+                       finished.
+        Raises:
+            TypeError: If the type of `pipeline_config` is not `dict`.
+            ValueError: If the key in `pipeline_config` not  in ["pipeline_interleave", "pipeline_scheduler"].
+            ValueError: If pipeline interleave is False, pipeline scheduler is not `1f1b`.
+        """
+        self.check_context_handle()
+        if not isinstance(pipeline_config, dict):
+            raise TypeError("For 'set_pipeline_config', the argument 'pipeine_config' "
+                            "must be dict, but got the type : {}.".format(type(pipeline_config)))
+        pp_interleave = _PipelineConfig.PIPELINE_INTERLEAVE
+        pp_scheduler = _PipelineConfig.PIPELINE_SCHEDULER
+        for config_name in pipeline_config:
+            unknown_config = []
+            if config_name not in [pp_interleave, pp_scheduler]:
+                unknown_config.append(config_name)
+            if unknown_config:
+                raise ValueError("Unknown config: {}".format(unknown_config))
+        Validator.check_bool(
+            pipeline_config[pp_interleave], pp_interleave, pp_interleave)
+        self._context_handle.set_pipeline_interleave(
+            pipeline_config[pp_interleave])
+        Validator.check_string(pipeline_config[pp_scheduler], [_PipelineScheduler.PIPELINE_1F1B,
+                                                               _PipelineScheduler.PIPELINE_GPIPE])
+        if not pipeline_config[pp_interleave] and pipeline_config[pp_scheduler] != _PipelineScheduler.PIPELINE_1F1B:
+            raise ValueError(f"When pipeline_interleave is False, {pp_scheduler} is not supported")
+        self._context_handle.set_pipeline_scheduler(pipeline_config[pp_scheduler])
     def get_enable_parallel_optimizer(self):
         """Get parallel optimizer flag."""
         self.check_context_handle()
         return self._context_handle.get_enable_parallel_optimizer()
+    def get_force_fp32_communication(self):
+        """Get force fp32 communication flag."""
+        self.check_context_handle()
+        return self._context_handle.get_force_fp32_communication()
     def set_parallel_optimizer_config(self, parallel_optimizer_config):
         r"""
         Set the configure for parallel optimizer. The configure provides more detailed behavior control about parallel
@@ -1087,6 +1181,7 @@ class _AutoParallelContext:
             self.set_enable_all_gather_fusion(openstate)
             self.set_enable_reduce_scatter_fusion(openstate)
 def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
     """
     Set strategy json configuration.
@@ -1110,6 +1205,7 @@ def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
     else:
         raise KeyError("Type must be 'SAVE' or 'LOAD' and mode must be 'all' or 'principal'")
 _AUTO_PARALLEL_CONTEXT = None
@@ -1145,7 +1241,9 @@ _set_auto_parallel_context_func_map = {
     "full_batch": auto_parallel_context().set_full_batch,
     "dataset_strategy": auto_parallel_context().set_dataset_strategy,
     "enable_parallel_optimizer": auto_parallel_context().set_enable_parallel_optimizer,
+    "force_fp32_communication": auto_parallel_context().set_force_fp32_communication,
     "parallel_optimizer_config": auto_parallel_context().set_parallel_optimizer_config,
+    "pipeline_config": auto_parallel_context().set_pipeline_config,
     "grad_accumulation_step": auto_parallel_context().set_grad_accumulation_step,
     "all_reduce_fusion_config": auto_parallel_context().set_all_reduce_fusion_split_indices,
     "communi_parallel_mode": auto_parallel_context().set_communi_parallel_mode,
@@ -1164,6 +1262,8 @@ _get_auto_parallel_context_func_map = {
     "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
     "pipeline_stages": auto_parallel_context().get_pipeline_stages,
     "pipeline_result_broadcast": auto_parallel_context().get_pipeline_result_broadcast,
+    "pipeline_interleave": auto_parallel_context().get_pipeline_interleave,
+    "pipeline_scheduler": auto_parallel_context().get_pipeline_scheduler,
     "parallel_mode": auto_parallel_context().get_parallel_mode,
     "search_mode": auto_parallel_context().get_strategy_search_mode,
     "auto_parallel_search_mode": auto_parallel_context().get_auto_parallel_search_mode,
@@ -1173,6 +1273,7 @@ _get_auto_parallel_context_func_map = {
     "full_batch": auto_parallel_context().get_full_batch,
     "dataset_strategy": auto_parallel_context().get_dataset_strategy,
     "enable_parallel_optimizer": auto_parallel_context().get_enable_parallel_optimizer,
+    "force_fp32_communication": auto_parallel_context().get_force_fp32_communication,
     "grad_accumulation_step": auto_parallel_context().get_grad_accumulation_step,
     "all_reduce_fusion_config": auto_parallel_context().get_all_reduce_fusion_split_indices,
     "communi_parallel_mode": auto_parallel_context().get_communi_parallel_mode,
@@ -1192,7 +1293,7 @@ _get_auto_parallel_context_func_map = {
                  grad_accumulation_step=int, all_reduce_fusion_config=list, group_ckpt_save_file=str,
                  communi_parallel_mode=str, optimizer_weight_shard_size=int, sharding_propagation=bool,
                  optimizer_weight_shard_aggregated_save=bool, enable_alltoall=bool, comm_fusion=dict,
-                 strategy_ckpt_config=dict)
+                 strategy_ckpt_config=dict, force_fp32_communication=bool)
 def _set_auto_parallel_context(**kwargs):
     """
     Set auto parallel context.
@@ -1240,6 +1341,9 @@ def _set_auto_parallel_context(**kwargs):
         full_batch (bool): Whether to load the whole batch on each device. Default: ``False``.
         dataset_strategy Union[str, tuple]: Dataset sharding strategy. Default: "data_parallel".
         enable_parallel_optimizer (bool): Enable using optimizer segmentation or not. Default: ``False``.
+        force_fp32_communication (bool): A switch that determines whether reduce operators (AllReduce, ReduceScatter)
+                        are forced to use the fp32 data type for communication during communication. True is the enable
+                        switch. Default: ``False`` .
         all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices.
         pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how
                         the devices are distributed alone the pipeline. The total devices will be divided into
@@ -1330,6 +1434,7 @@ def _reset_auto_parallel_context():
     - strategy_ckpt_load_file: ""
     - strategy_ckpt_save_file: ""
     - enable_parallel_optimizer: False
+    - force_fp32_communication: False
     - search_mode: 'recursive_programming
     - auto_parallel_search_mode: 'recursive_programming
     - sharding_propagation: False

mindspore/parallel/_parallel_serialization.py CHANGED Viewed

@@ -259,6 +259,33 @@ def _extract_pipeline_stage_num(strategy_file):
     return pipeline_stage_num
+def _extract_src_dst_layout_map_by_src(src_strategy_file=None, dst_strategy_file=None):
+    """Extract strategy list by src strategy"""
+    src_layout_map = _extract_layout_map(src_strategy_file)
+    dst_layout_map = _extract_layout_map(dst_strategy_file)
+    if dst_layout_map is None:
+        return src_layout_map, dst_layout_map
+    for param_name in list(dst_layout_map.keys()):
+        if param_name in src_layout_map.keys():
+            continue
+        dst_layout_map.pop(param_name)
+    stage_id = 0
+    if src_strategy_file[-5:] == ".json":
+        with open(src_strategy_file, 'r') as f:
+            json_content = json.load(f)
+        strategy_items = json_content.get("parallel_strategy_item")
+        if not strategy_items:
+            raise ValueError("The strategy file {} if empty.".format(src_strategy_file))
+        stage_id = strategy_items.get(list(strategy_items.keys())[0]).get('stage')
+    else:
+        src_parallel_strategy_map = _load_protobuf_strategy(src_strategy_file)
+        strategy_items = src_parallel_strategy_map.parallel_strategy_item
+        if not strategy_items:
+            raise ValueError("The strategy file {} if empty.".format(src_strategy_file))
+        stage_id = strategy_items[0].parallel_strategys.stage
+    return src_layout_map, dst_layout_map, stage_id
 def _extract_src_dst_layout_map(rank_id, src_strategy_file=None, dst_strategy_file=None):
     """Extract strategy list"""
     src_layout_map = _extract_layout_map(src_strategy_file, None)
@@ -357,6 +384,7 @@ def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, s
     Transform model parallel dimension for distributed checkpoint files.
     """
     transform_param_dict = {}
+    device_num = -1
     for param_name, _ in param_total_dict.items():
         tensor_shape = list(param_total_dict[param_name].values())[0].shape
         from_dev_matrix = [1]
@@ -410,14 +438,18 @@ def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, s
         to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
         _insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
         transform_operator_stack = _generate_transform_operator_stack(param_rank_map, rank_id)
-        _apply_tensor_transform_operators(transform_operator_stack, param_total_dict[param_name], device_num)
-        transform_tensor = ms.Tensor(param_total_dict[param_name][rank_id % device_num])
+        param_total_dict_copy = param_total_dict[param_name].copy()
+        _apply_tensor_transform_operators(transform_operator_stack, param_total_dict_copy, device_num)
+        transform_tensor = ms.Tensor(param_total_dict_copy[rank_id % device_num])
         requires_grad = param_attr_dict[param_name][rank_id % device_num][0]
         layerwise_parallel = param_attr_dict[param_name][rank_id % device_num][1]
         transform_para = ms.Parameter(transform_tensor, param_name, requires_grad, layerwise_parallel)
         if param_type_dict[param_name][rank_id % device_num] == "BFloat16":
             transform_para.set_dtype(ms.bfloat16)
         transform_param_dict[param_name] = transform_para
+    if device_num < 1:
+        raise ValueError("None of the parameters in checkpoint file are in either src strategy or "
+                         "dst strategy. Please check correctness of strategy files.")
     # Handle those parameter like learning_rate, global_step which not in strategy_file.
     for param_name, _ in param_total_dict.items():

mindspore/parallel/_utils.py CHANGED Viewed

@@ -191,6 +191,22 @@ def _origin_shapes(shapes):
     return new_shapes
+def _dynamic_shape_for_dataset(dataset_shapes, dynamic_shapes):
+    """convert static dataset shapes to dynamic shape"""
+    if len(dataset_shapes) != len(dynamic_shapes):
+        raise ValueError("The dataset shapes size of {} is not equal to "
+                         "dynamic shapes size of {}".format(dataset_shapes, dynamic_shapes))
+    ret = dataset_shapes
+    for i in range(len(dynamic_shapes)):
+        if len(dataset_shapes[i]) != len(dynamic_shapes[i]):
+            raise ValueError("The dataset shapes size of {} is not equal to "
+                             "dynamic shapes size of {}".format(dataset_shapes, dynamic_shapes))
+        for j in range(len(dynamic_shapes[i])):
+            if dynamic_shapes[i][j] == -1:
+                ret[i][j] = -1
+    return ret
 def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
     """Convert numpy to tensor, expanding batch dimension according to device_num, adapt to feed the data
        from host solution.

mindspore/parallel/algo_parameter_config.py CHANGED Viewed

@@ -229,7 +229,7 @@ def set_algo_parameters(**kwargs):
     """
     Set parameters in the algorithm for parallel strategy searching. See a typical use in
     `test_auto_parallel_resnet.py
-    <https://gitee.com/mindspore/mindspore/blob/r2.3.q1/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
+    <https://gitee.com/mindspore/mindspore/blob/master/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
     Note:
         The attribute name is required. This interface works ONLY in AUTO_PARALLEL mode.
@@ -266,14 +266,14 @@ def set_algo_parameters(**kwargs):
             For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
             Please see the `rank table startup
-            <https://www.mindspore.cn/tutorials/experts/en/r2.3.q1/parallel/rank_table.html>`_
+            <https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
             for more details.
             For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun startup
-            <https://www.mindspore.cn/tutorials/experts/en/r2.3.q1/parallel/mpirun.html>`_ .
+            <https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
             For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
-            Startup <https://www.mindspore.cn/tutorials/experts/en/r2.3.q1/parallel/dynamic_cluster.html>`_ .
+            Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
         >>> import numpy as np
         >>> import mindspore as ms

mindspore 2.3.0rc1__cp37-none-any.whl → 2.3.0rc2__cp37-none-any.whl

Potentially problematic release.

mindspore 2.3.0rc1cp37-none-any.whl → 2.3.0rc2cp37-none-any.whl