PyPI - mindspore - Versions diffs - 2.3.0__cp310-cp310-win_amd64.whl → 2.4.1__cp310-cp310-win_amd64.whl - Mend

mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (275) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +3 -1
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +50 -9
mindspore/_extends/parse/compile_config.py +41 -0
mindspore/_extends/parse/parser.py +9 -7
mindspore/_extends/parse/standard_method.py +52 -14
mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
mindspore/amp.py +24 -10
mindspore/common/__init__.py +6 -4
mindspore/common/_pijit_context.py +190 -0
mindspore/common/_register_for_tensor.py +2 -1
mindspore/common/_tensor_overload.py +139 -0
mindspore/common/api.py +102 -87
mindspore/common/dump.py +5 -6
mindspore/common/generator.py +1 -7
mindspore/common/hook_handle.py +14 -26
mindspore/common/initializer.py +51 -15
mindspore/common/mindir_util.py +2 -2
mindspore/common/parameter.py +62 -15
mindspore/common/recompute.py +39 -9
mindspore/common/sparse_tensor.py +7 -3
mindspore/common/tensor.py +183 -37
mindspore/communication/__init__.py +1 -1
mindspore/communication/_comm_helper.py +38 -3
mindspore/communication/comm_func.py +315 -60
mindspore/communication/management.py +14 -14
mindspore/context.py +132 -22
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/__init__.py +1 -1
mindspore/dataset/core/config.py +7 -0
mindspore/dataset/core/validator_helpers.py +7 -0
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +72 -44
mindspore/dataset/engine/datasets_audio.py +7 -7
mindspore/dataset/engine/datasets_standard_format.py +53 -3
mindspore/dataset/engine/datasets_text.py +20 -20
mindspore/dataset/engine/datasets_user_defined.py +174 -104
mindspore/dataset/engine/datasets_vision.py +33 -33
mindspore/dataset/engine/iterators.py +29 -0
mindspore/dataset/engine/obs/util.py +7 -0
mindspore/dataset/engine/queue.py +114 -60
mindspore/dataset/engine/serializer_deserializer.py +2 -2
mindspore/dataset/engine/validators.py +34 -14
mindspore/dataset/text/__init__.py +1 -4
mindspore/dataset/transforms/__init__.py +0 -3
mindspore/dataset/utils/line_reader.py +2 -0
mindspore/dataset/vision/__init__.py +1 -4
mindspore/dataset/vision/utils.py +1 -1
mindspore/dataset/vision/validators.py +2 -1
mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
mindspore/experimental/es/embedding_service.py +883 -0
mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
mindspore/experimental/llm_boost/__init__.py +21 -0
mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
mindspore/experimental/llm_boost/register.py +129 -0
mindspore/experimental/llm_boost/utils.py +31 -0
mindspore/experimental/optim/adamw.py +85 -0
mindspore/experimental/optim/optimizer.py +3 -0
mindspore/hal/__init__.py +3 -3
mindspore/hal/contiguous_tensors_handle.py +175 -0
mindspore/hal/stream.py +18 -0
mindspore/include/api/model_group.h +13 -1
mindspore/include/api/types.h +10 -10
mindspore/include/dataset/config.h +2 -2
mindspore/include/dataset/constants.h +2 -2
mindspore/include/dataset/execute.h +2 -2
mindspore/include/dataset/vision.h +4 -0
mindspore/log.py +1 -1
mindspore/mindrecord/filewriter.py +68 -51
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mint/__init__.py +983 -46
mindspore/mint/distributed/__init__.py +31 -0
mindspore/mint/distributed/distributed.py +254 -0
mindspore/mint/nn/__init__.py +268 -23
mindspore/mint/nn/functional.py +125 -19
mindspore/mint/nn/layer/__init__.py +39 -0
mindspore/mint/nn/layer/activation.py +133 -0
mindspore/mint/nn/layer/normalization.py +477 -0
mindspore/mint/nn/layer/pooling.py +110 -0
mindspore/mint/optim/adamw.py +26 -13
mindspore/mint/special/__init__.py +63 -0
mindspore/multiprocessing/__init__.py +2 -1
mindspore/nn/__init__.py +0 -1
mindspore/nn/cell.py +276 -96
mindspore/nn/layer/activation.py +211 -44
mindspore/nn/layer/basic.py +137 -10
mindspore/nn/layer/embedding.py +137 -2
mindspore/nn/layer/normalization.py +101 -5
mindspore/nn/layer/padding.py +34 -48
mindspore/nn/layer/pooling.py +161 -7
mindspore/nn/layer/transformer.py +3 -3
mindspore/nn/loss/__init__.py +2 -2
mindspore/nn/loss/loss.py +84 -6
mindspore/nn/optim/__init__.py +2 -1
mindspore/nn/optim/adadelta.py +1 -1
mindspore/nn/optim/adam.py +1 -1
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/tft_wrapper.py +124 -0
mindspore/nn/wrap/cell_wrapper.py +12 -23
mindspore/nn/wrap/grad_reducer.py +5 -5
mindspore/nn/wrap/loss_scale.py +17 -3
mindspore/numpy/__init__.py +1 -1
mindspore/numpy/array_creations.py +65 -68
mindspore/numpy/array_ops.py +64 -60
mindspore/numpy/fft.py +610 -75
mindspore/numpy/logic_ops.py +11 -10
mindspore/numpy/math_ops.py +85 -84
mindspore/numpy/utils_const.py +4 -4
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +6 -4
mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
mindspore/ops/_vmap/vmap_array_ops.py +2 -4
mindspore/ops/_vmap/vmap_math_ops.py +17 -1
mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
mindspore/ops/auto_generate/gen_extend_func.py +767 -13
mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
mindspore/ops/composite/base.py +85 -48
mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
mindspore/ops/function/__init__.py +22 -0
mindspore/ops/function/array_func.py +492 -153
mindspore/ops/function/debug_func.py +113 -1
mindspore/ops/function/fft_func.py +15 -2
mindspore/ops/function/grad/grad_func.py +3 -2
mindspore/ops/function/math_func.py +564 -207
mindspore/ops/function/nn_func.py +817 -383
mindspore/ops/function/other_func.py +3 -2
mindspore/ops/function/random_func.py +402 -12
mindspore/ops/function/reshard_func.py +13 -11
mindspore/ops/function/sparse_unary_func.py +1 -1
mindspore/ops/function/vmap_func.py +3 -2
mindspore/ops/functional.py +24 -14
mindspore/ops/op_info_register.py +3 -3
mindspore/ops/operations/__init__.py +7 -2
mindspore/ops/operations/_grad_ops.py +2 -76
mindspore/ops/operations/_infer_ops.py +1 -1
mindspore/ops/operations/_inner_ops.py +71 -94
mindspore/ops/operations/array_ops.py +14 -146
mindspore/ops/operations/comm_ops.py +63 -53
mindspore/ops/operations/custom_ops.py +83 -19
mindspore/ops/operations/debug_ops.py +42 -10
mindspore/ops/operations/manually_defined/_inner.py +12 -0
mindspore/ops/operations/manually_defined/ops_def.py +273 -20
mindspore/ops/operations/math_ops.py +12 -223
mindspore/ops/operations/nn_ops.py +20 -114
mindspore/ops/operations/other_ops.py +7 -4
mindspore/ops/operations/random_ops.py +46 -1
mindspore/ops/primitive.py +18 -6
mindspore/ops_generate/arg_dtype_cast.py +2 -0
mindspore/ops_generate/gen_aclnn_implement.py +11 -11
mindspore/ops_generate/gen_constants.py +36 -0
mindspore/ops_generate/gen_ops.py +67 -52
mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
mindspore/ops_generate/gen_pyboost_func.py +131 -47
mindspore/ops_generate/op_proto.py +10 -3
mindspore/ops_generate/pyboost_utils.py +14 -1
mindspore/ops_generate/template.py +43 -21
mindspore/parallel/__init__.py +3 -1
mindspore/parallel/_auto_parallel_context.py +31 -9
mindspore/parallel/_cell_wrapper.py +85 -0
mindspore/parallel/_parallel_serialization.py +47 -19
mindspore/parallel/_tensor.py +127 -13
mindspore/parallel/_utils.py +53 -22
mindspore/parallel/algo_parameter_config.py +5 -5
mindspore/parallel/checkpoint_transform.py +46 -39
mindspore/parallel/cluster/process_entity/__init__.py +1 -1
mindspore/parallel/cluster/process_entity/_api.py +31 -23
mindspore/parallel/cluster/process_entity/_utils.py +2 -27
mindspore/parallel/parameter_broadcast.py +3 -4
mindspore/parallel/shard.py +162 -31
mindspore/parallel/transform_safetensors.py +1146 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/common/constant.py +29 -0
mindspore/profiler/common/registry.py +47 -0
mindspore/profiler/common/util.py +28 -0
mindspore/profiler/dynamic_profiler.py +694 -0
mindspore/profiler/envprofiling.py +17 -19
mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
mindspore/profiler/parser/base_timeline_generator.py +19 -25
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
mindspore/profiler/parser/framework_parser.py +1 -391
mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
mindspore/profiler/parser/memory_usage_parser.py +0 -154
mindspore/profiler/parser/profiler_info.py +78 -6
mindspore/profiler/profiler.py +153 -0
mindspore/profiler/profiling.py +285 -413
mindspore/rewrite/__init__.py +1 -2
mindspore/rewrite/common/namespace.py +4 -4
mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
mindspore/run_check/_check_version.py +39 -104
mindspore/safeguard/rewrite_obfuscation.py +591 -247
mindspore/train/__init__.py +4 -3
mindspore/train/_utils.py +105 -19
mindspore/train/amp.py +171 -53
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +4 -4
mindspore/train/callback/_checkpoint.py +97 -31
mindspore/train/callback/_cluster_monitor.py +1 -1
mindspore/train/callback/_flops_collector.py +1 -0
mindspore/train/callback/_loss_monitor.py +3 -3
mindspore/train/callback/_on_request_exit.py +145 -31
mindspore/train/callback/_summary_collector.py +5 -5
mindspore/train/callback/_tft_register.py +375 -0
mindspore/train/dataset_helper.py +15 -3
mindspore/train/metrics/metric.py +3 -3
mindspore/train/metrics/roc.py +4 -4
mindspore/train/mind_ir_pb2.py +44 -39
mindspore/train/model.py +154 -58
mindspore/train/serialization.py +342 -128
mindspore/utils/__init__.py +21 -0
mindspore/utils/utils.py +60 -0
mindspore/version.py +1 -1
{mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
{mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +248 -242
mindspore/include/c_api/ms/abstract.h +0 -67
mindspore/include/c_api/ms/attribute.h +0 -197
mindspore/include/c_api/ms/base/handle_types.h +0 -43
mindspore/include/c_api/ms/base/macros.h +0 -32
mindspore/include/c_api/ms/base/status.h +0 -33
mindspore/include/c_api/ms/base/types.h +0 -283
mindspore/include/c_api/ms/context.h +0 -102
mindspore/include/c_api/ms/graph.h +0 -160
mindspore/include/c_api/ms/node.h +0 -606
mindspore/include/c_api/ms/tensor.h +0 -161
mindspore/include/c_api/ms/value.h +0 -84
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/extend/basic.py +0 -140
mindspore/nn/extend/embedding.py +0 -143
mindspore/nn/extend/layer/normalization.py +0 -109
mindspore/nn/extend/pooling.py +0 -117
mindspore/nn/layer/embedding_service.py +0 -531
mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
mindspore/ops/extend/__init__.py +0 -53
mindspore/ops/extend/array_func.py +0 -218
mindspore/ops/extend/math_func.py +0 -76
mindspore/ops/extend/nn_func.py +0 -308
mindspore/ops/silent_check.py +0 -162
mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
mindspore/profiler/parser/msadvisor_parser.py +0 -240
mindspore/train/callback/_mindio_ttp.py +0 -443
{mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0

mindspore/train/__init__.py CHANGED Viewed

@@ -27,10 +27,10 @@ from mindspore.train.loss_scale_manager import LossScaleManager, FixedLossScaleM
 from mindspore.train.serialization import save_checkpoint, load_checkpoint, load_param_into_net, export, \
     load, parse_print, build_searched_strategy, merge_sliced_parameter, load_distributed_checkpoint, \
     async_ckpt_thread_status, restore_group_info_list, convert_model, obfuscate_model, export_split_mindir, \
-    load_checkpoint_async, check_checkpoint
+    load_checkpoint_async, check_checkpoint, get_ckpt_path_with_strategy
 from mindspore.train.callback import Callback, LossMonitor, TimeMonitor, ModelCheckpoint, SummaryCollector, \
     CheckpointConfig, RunContext, LearningRateScheduler, SummaryLandscape, FlopsUtilizationCollector, \
-    History, LambdaCallback, ReduceLROnPlateau, EarlyStopping, OnRequestExit, BackupAndRestore, MindIOTTPAdapter
+    History, LambdaCallback, ReduceLROnPlateau, EarlyStopping, OnRequestExit, BackupAndRestore, TFTRegister
 from mindspore.train.summary import SummaryRecord
 from mindspore.train.train_thor import ConvertNetUtils, ConvertModelUtils
 from mindspore.train.metrics import *
@@ -40,7 +40,8 @@ __all__ = ["Model", "DatasetHelper", "connect_network_with_dataset", "build_trai
            "FixedLossScaleManager", "DynamicLossScaleManager", "save_checkpoint", "load_checkpoint", "check_checkpoint",
            "load_param_into_net", "export", "load", "export_split_mindir", "parse_print", "build_searched_strategy",
            "merge_sliced_parameter", "load_distributed_checkpoint", "async_ckpt_thread_status",
-           "restore_group_info_list", "convert_model", "data_sink", "obfuscate_model", "load_checkpoint_async"]
+           "restore_group_info_list", "convert_model", "data_sink", "obfuscate_model", "load_checkpoint_async",
+           "get_ckpt_path_with_strategy"]
 __all__.extend(callback.__all__)
 __all__.extend(summary.__all__)
 __all__.extend(train_thor.__all__)

mindspore/train/_utils.py CHANGED Viewed

@@ -16,6 +16,8 @@
 from __future__ import absolute_import
 import os
+import threading
+from datetime import datetime
 import json
 from collections.abc import Iterable
@@ -25,15 +27,18 @@ from mindspore.common.tensor import Tensor
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype
 from mindspore.common import dtype as mstype
+from mindspore import context
 from mindspore import log as logger
 from mindspore import _checkparam as Validator
 from mindspore.common.api import _cell_graph_executor
+from mindspore.communication import get_group_size
 from mindspore.train.mind_ir_pb2 import ModelProto as mindir_model
 from mindspore.train.checkpoint_pb2 import Checkpoint
 from mindspore.train.node_strategy_pb2 import ParallelStrategyMap as ckpt_strategy
 from mindspore.train.lineage_pb2 import DatasetGraph, TrainLineage, EvaluationLineage, UserDefinedInfo
 from mindspore.parallel._parallel_serialization import _make_dir
 from mindspore.ops.operations import debug_ops
+from mindspore.nn.cell import Cell
 def _convert_type(types):
@@ -71,6 +76,18 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset', create_data_inf
     queue_name = _cell_graph_executor.get_queue_name(phase)
     if queue_name is None:
         queue_name = str("")
+    use_pipeline_parallel = (context.get_auto_parallel_context("pipeline_stages") > 1)
+    # temp env to disable dynamic feature of sink size 1
+    dynamic_sink1_env = os.getenv("MS_DEV_DYNAMIC_SINK1", None)
+    dynamic_sink1 = True
+    if dynamic_sink1_env and dynamic_sink1_env.strip() in ['False', 'false']:
+        dynamic_sink1 = False
+    if use_pipeline_parallel or not dynamic_sink1:
+        create_data_info_queue = False
     exec_dataset = exec_dataset.device_que(send_epoch_end=send_epoch_end,
                                            create_data_info_queue=create_data_info_queue, queue_name=queue_name)
     _cell_graph_executor.init_dataset(exec_dataset.queue_name,
@@ -295,10 +312,68 @@ def parse_strategy_ckpt(file_name):
         for ele in param.parallel_layouts.tensor_map[0].ListFields()[0][1]:
             tensor_map.append(ele)
-        layout_dict[param.param_name] = [dev_matrix, tensor_map]
+        layout_dict[param.param_name] = [dev_matrix, tensor_map, param.parallel_layouts.opt_weight_shard_step,
+                                         param.parallel_layouts.opt_weight_shard_size]
     return layout_dict
+def _get_strategy_opt_shard(param_redundancy_dict, parameter_layout_opt_shard):
+    """Strategy ckpt append opt shard."""
+    for key, value in parameter_layout_opt_shard.items():
+        if value[1] not in (-1, 0):
+            opt_para_num = value[1]
+            param_redundancy_ranks = param_redundancy_dict.get(key)
+            res = []
+            for param_ranks in param_redundancy_ranks:
+                if len(param_ranks) % opt_para_num == 0:
+                    for i in range(0, opt_para_num):
+                        res.append(param_ranks[i::opt_para_num])
+            param_redundancy_dict[key] = tuple(res)
+def _get_layout_opt_shard(layout_obj, param_redundancy_dict):
+    """Layout ckpt append opt shard."""
+    for key, value in layout_obj.items():
+        if value[5]:
+            world_groups = ("hccl_world_group", "nccl_world_group", "mccl_world_group")
+            if value[5] in world_groups:
+                opt_para_num = get_group_size()
+            elif "-" in value[5]:
+                opt_para_str = value[5].split("-")[0]
+                opt_para_num = int(opt_para_str)
+            else:
+                raise ValueError(f"For get_parameter_redundancy, the format of the parallel communication domain for "
+                                 f"the optimizer is incorrect.")
+            param_redundancy_ranks = param_redundancy_dict.get(key)
+            res = []
+            for param_ranks in param_redundancy_ranks:
+                if len(param_ranks) % opt_para_num == 0:
+                    for i in range(0, opt_para_num):
+                        res.append(param_ranks[i::opt_para_num])
+            param_redundancy_dict[key] = tuple(res)
+def _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundancy_dict, initial_rank):
+    """Get parameter redundancy without opt shard."""
+    for key, (slices, deploy_loc, *_) in parameter_layout.items():
+        redundancy_matrix = np.zeros(shape=slices + [len(slices)], dtype=np.int8)
+        for i in deploy_loc:
+            internal_slice = tuple(slice(None) for _ in range(i))
+            for j in range(slices[-i - 1]):
+                if i == -1:
+                    continue
+                else:
+                    redundancy_matrix[(..., j) + internal_slice + (i,)] = j
+        locate_list = redundancy_matrix.reshape((-1, len(slices))).tolist()
+        redundancy_dict = {}
+        for index, locate in enumerate(locate_list):
+            redundancy_dict.setdefault(tuple(locate), []).append(index + initial_rank)
+        redundancy_list = []
+        for _, indices in sorted(redundancy_dict.items()):
+            redundancy_list.append(tuple(indices))
+        param_redundancy_dict[key] = tuple(redundancy_list)
 def get_parameter_redundancy(layout_obj, initial_rank=0):
     """
     Get parameter redundancy map.
@@ -319,31 +394,31 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
          'param4': ((0, 4, 8, 12), (1, 5, 9, 13), (2, 6, 10, 14), (3, 7, 11, 15))}
     """
     if isinstance(layout_obj, str):
-        parameter_layout = parse_strategy_ckpt(layout_obj)
+        parameter_layout_total = parse_strategy_ckpt(layout_obj)
+        parameter_layout = {}
+        parameter_layout_opt_shard = {}
+        for key, value in parameter_layout_total.items():
+            parameter_layout[key] = value[0:2]
+            parameter_layout_opt_shard[key] = value[2:]
+    elif isinstance(layout_obj, Cell):
+        from mindspore.communication.management import get_process_group_ranks
+        groups_ranks = (tuple(get_process_group_ranks()),)
+        param_redundancy_dict = {param.name: groups_ranks for _, param in layout_obj.parameters_and_names()}
+        return param_redundancy_dict
     else:
         parameter_layout = {}
         for k, v in layout_obj.items():
             parameter_layout[k] = v[:2]
     param_redundancy_dict = {}
-    for key, (slices, deploy_loc, *_) in parameter_layout.items():
-        redundancy_matrix = np.zeros(shape=slices + [len(slices)], dtype=np.int8)
-        for i in deploy_loc:
-            internal_slice = tuple(slice(None) for _ in range(i))
-            for j in range(slices[-i - 1]):
-                if i == -1:
-                    continue
-                else:
-                    redundancy_matrix[(..., j) + internal_slice + (i,)] = j
-        locate_list = redundancy_matrix.reshape((-1, len(slices))).tolist()
-        redundancy_dict = {}
-        for index, locate in enumerate(locate_list):
-            redundancy_dict.setdefault(tuple(locate), []).append(index+initial_rank)
-        redundancy_list = []
-        for _, indices in sorted(redundancy_dict.items()):
-            redundancy_list.append(tuple(indices))
-        param_redundancy_dict[key] = tuple(redundancy_list)
+    _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundancy_dict, initial_rank)
+    if isinstance(layout_obj, str):
+        _get_strategy_opt_shard(param_redundancy_dict, parameter_layout_opt_shard)
+    else:
+        _get_layout_opt_shard(layout_obj, param_redundancy_dict)
     return param_redundancy_dict
@@ -437,3 +512,14 @@ def parse_hccl_file(hccl_file_path):
             rankid_dict[int(device["rank_id"])] = device["device_ip"]
     return rankid_dict
+def vlog_print(level, module, file, line, message):
+    '''Read environment variable VLOG_v and print to log'''
+    if os.environ.get("VLOG_v") == level:
+        now = datetime.now()
+        formatted_time = now.strftime("%Y-%m-%d-%H:%M:%S.%f")[:-3] + f".{now.microsecond // 1000}"
+        path = 'mindspore' + file.split("mindspore")[-1]
+        pid = os.getpid()
+        thread_id = threading.get_ident()
+        print(f"[V{level}] {module}({pid},{thread_id},python):{formatted_time} [{path}:{line}] {message}", flush=True)

mindspore/train/amp.py CHANGED Viewed

@@ -16,6 +16,9 @@
 from __future__ import absolute_import
 import inspect
 import types
+from typing import Any
+import functools
+import collections
 import mindspore as ms
 from mindspore import nn
@@ -29,8 +32,9 @@ from mindspore.train.loss_scale_manager import DynamicLossScaleManager, LossScal
 from mindspore import boost, context
 from mindspore.ops import operations as P
 from mindspore.ops import Primitive
+from mindspore.ops import auto_generate as gen
 from mindspore import log as logger
+from mindspore._c_expression.amp import pop_amp_strategy, push_amp_strategy, create_amp_strategy, AmpLevel
 AMP_WHITE_LIST = [
     nn.Conv1d,
@@ -52,17 +56,67 @@ AMP_WHITE_LIST = [
     P.BatchMatMul,
     P.PReLU,
     P.ReLU,
-    P.Ger
+    P.Ger,
 ]
 AMP_BLACK_LIST = [
     nn.BatchNorm1d,
     nn.BatchNorm2d,
     nn.BatchNorm3d,
-    nn.LayerNorm
+    nn.LayerNorm,
 ]
+AMP_AUTO_WHITE_LIST = [
+    P.Conv2D,
+    P.Conv3D,
+    P.Conv2DTranspose,
+    P.Conv3DTranspose,
+    gen.Convolution,
+    P.MatMul,
+    gen.MatMulExt,
+    P.BatchMatMul,
+    gen.BatchMatMulExt,
+    gen.PReLU,
+    P.Einsum,
+    gen.Dense,
+    gen.Addmm,
+]
+AMP_AUTO_BLACK_LIST = [
+    gen.Pow,
+    gen.ACos,
+    gen.Asin,
+    gen.Cosh,
+    P.Erfinv,
+    P.Exp,
+    P.Expm1,
+    P.Log,
+    P.Log1p,
+    P.Reciprocal,
+    P.Rsqrt,
+    P.Sinh,
+    P.Tan,
+    P.Softplus,
+    gen.SoftplusExt,
+    P.LayerNorm,
+    gen.LayerNormExt,
+    P.BatchNorm,
+    gen.GroupNorm,
+    P.KLDivLoss,
+    P.SmoothL1Loss,
+    P.MultilabelMarginLoss,
+    P.SoftMarginLoss,
+    P.TripletMarginLoss,
+    P.MultiMarginLoss,
+    P.BCEWithLogitsLoss,
+    P.Pdist,
+    P.Cdist,
+    P.Renorm,
+]
+# Indicates which inputs of primitives need to be converted
+AMP_PRIM_ARG_TABLE = collections.defaultdict(list, {})
 # Primitives in inner amp black list will not be converted in O2/O3
 _INNER_AMP_BLACK_LIST = []
@@ -302,6 +356,42 @@ def _auto_black_list(network, black_list, dtype):
     return network
+class amp_decorator:
+    """
+    Auto mixed precision decorator.
+    Type of lists: List[Tuple[str, List[int]]]
+    """
+    def __init__(self, amp_level, amp_dtype, white_list, black_list):
+        self.amp_level = amp_level
+        self.amp_dtype = amp_dtype
+        self.white_list = white_list
+        self.black_list = black_list
+    def __enter__(self):
+        push_amp_strategy(self.amp_level, self.amp_dtype, self.white_list, self.black_list)
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any):
+        pop_amp_strategy()
+def _set_amp_decorator(obj, amp_level, amp_dtype, white_list, black_list):
+    """
+    Set auto mixed precision context decorator for object.
+    Type of lists: List[Tuple[str, List[int]]]
+    """
+    if inspect.isfunction(obj) or inspect.ismethod(obj):
+        @functools.wraps(obj)
+        def wrapper(*args, **kwargs):
+            with amp_decorator(amp_level, amp_dtype, white_list, black_list):
+                return obj(*args, **kwargs)
+        return wrapper
+    if isinstance(obj, nn.Cell):
+        obj.construct = types.MethodType(
+            _set_amp_decorator(obj.construct.__func__, amp_level, amp_dtype, white_list, black_list), obj)
+        return obj
+    raise TypeError(f"For amp_level '{amp_level}', the network type should be Cell or function, bot got {type(obj)}.")
 def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
     """
     Returns a network processed with auto mixed precision.
@@ -312,26 +402,44 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
     converted to lower precision float, and calculation results are converted back to full precision float,
     i.e. ``mstype.float32`` .
-    The framework has a set of built-in blacklists and whitelists, and the `amp_level` determines which cells and
-    operators are specifically converted.
+    The `amp_level` and its corresponding lists determine which cells and operators are converted.
-    The current built-in whitelist contents are:
+    When `amp_level` is set to ``O0``, no cells and operators are converted.
-    [:class:`mindspore.nn.Conv1d`, :class:`mindspore.nn.Conv2d`, :class:`mindspore.nn.Conv3d`,
-    :class:`mindspore.nn.Conv1dTranspose`, :class:`mindspore.nn.Conv2dTranspose`,
-    :class:`mindspore.nn.Conv3dTranspose`, :class:`mindspore.nn.Dense`, :class:`mindspore.nn.LSTMCell`,
-    :class:`mindspore.nn.RNNCell`, :class:`mindspore.nn.GRUCell`, :class:`mindspore.ops.Conv2D`,
-    :class:`mindspore.ops.Conv3D`, :class:`mindspore.ops.Conv2DTranspose`,
-    :class:`mindspore.ops.Conv3DTranspose`, :class:`mindspore.ops.MatMul`, :class:`mindspore.ops.BatchMatMul`,
-    :class:`mindspore.ops.PReLU`, :class:`mindspore.ops.ReLU`, :class:`mindspore.ops.Ger`]
+    When `amp_level` is set to ``O1``, cells and operators in whitelist will be converted to lower precision
+    operations. For details on whitelist, refer to :func:`mindspore.amp.get_white_list`.
-    The current built-in blacklist contents are:
+    When `amp_level` is set to ``O2``, cells in blacklist will maintain full precision, and cells outside the
+    list will be converted to low precision. For details on blacklist, refer to :func:`mindspore.amp.get_black_list`.
-    [:class:`mindspore.nn.BatchNorm1d`, :class:`mindspore.nn.BatchNorm2d`, :class:`mindspore.nn.BatchNorm3d`,
-    :class:`mindspore.nn.LayerNorm`]
+    When `amp_level` is set to ``O3``, all cells will be converted to low precision.
+    When `amp_level` is set to ``auto``, operators in `auto_whitelist` will be converted to lower precision
+    operations, operators in `auto_blacklist` will be converted to full precision  operations, operators in
+    `promote_list` will be converted to the higher accuracy float type of the operator inputs, and operators
+    not listed will run in the type defined by their inputs.
+    Operators in `auto_whitelist` are:
+    ``Conv2D``, ``Conv3D``, ``Conv2DTranspose``, ``Conv3DTranspose``, ``Convolution``, ``MatMul``, ``MatMulExt``,
+    ``BatchMatMul``, ``BatchMatMulExt``, ``PReLU``, ``Einsum``, ``Dense``, ``Addmm``
+    Operators in `auto_blacklist` are:
+    ``Pow``, ``ACos``, ``Asin``, ``Cosh``, ``Erfinv``, ``Exp``, ``Expm1``, ``Log``, ``Log1p``, ``Reciprocal``,
+    ``Rsqrt``, ``Sinh``, ``Tan``, ``Softplus``, ``SoftplusExt``, ``LayerNorm``, ``LayerNormExt``, ``BatchNorm``,
+    ``GroupNorm``, ``KLDivLoss``, ``SmoothL1Loss``, ``MultilabelMarginLoss``, ``SoftMarginLoss``,
+    ``TripletMarginLoss``, ``MultiMarginLoss``, ``BCEWithLogitsLoss``, ``Pdist``, ``Cdist``, ``Renorm``,
+    ``ReduceProd``, ``Softmax``, ``LogSoftmax``, ``CumProd``, ``CumSum``, ``CumsumExt``, ``ProdExt``, ``SumExt``,
+    ``Norm``
+    Operators in `promote_list` are:
+    ``Addcdiv``, ``Addcmul``, ``Cross``, ``_PyboostCrossPrim``, ``Dot``, ``GridSampler2D``, ``GridSampler3D``,
+    ``BiasAdd``
     For details on automatic mixed precision, refer to
-    `Automatic Mix Precision <https://www.mindspore.cn/tutorials/en/master/advanced/mixed_precision.html>`_ .
+    `Automatic Mix Precision <https://www.mindspore.cn/tutorials/en/master/beginner/mixed_precision.html>`_ .
     Note:
         - Repeatedly calling mixed-precision interfaces, such as `custom_mixed_precision` and `auto_mixed_precision`,
@@ -339,10 +447,18 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
         - If interfaces like `Model` and `build_train_network` is used to train the network which is converted by
           mixed-precision interfaces such as `custom_mixed_precision` and `auto_mixed_precision`, `amp_level`
           need to be configured to ``O0`` to avoid the duplicated accuracy conversion.
+        - When `amp_level` is set to ``auto``, the output of the network may be lower precision. In this case, you
+          may need to manually convert the type to avoid type inconsistency errors of the loss function.
+        - When `amp_level` is set to ``auto``, and cells in the network are configured with `to_float`, the accuracy
+          specified by `to_float` takes effect first.
+    .. warning::
+        ``auto`` level of `amp_level` is an experimental API that is subject to change or deletion.
     Args:
-        network (Cell): Definition of the network.
-        amp_level (str): Supports ["O0", "O1", "O2", "O3"]. Default: ``"O0"`` .
+        network (Union[Cell, function]): Definition of the network. Function type is supported only when `amp_level`
+            is set to ``auto`` .
+        amp_level (str): Supports ["O0", "O1", "O2", "O3", "auto"]. Default: ``"O0"`` .
             - "O0": Do not change.
             - "O1": Convert cells and operators in whitelist to lower precision operations, and keep full
@@ -350,12 +466,16 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
             - "O2": Keep full precision operations for cells and operators in blacklist, and convert the rest
               to lower precision operations.
             - "O3": Cast network to lower precision.
+            - "auto": Operators in `auto_whitelist` will be converted to lower precision operations, operators in
+              `auto_blacklist` will be converted to full precision, operators in `promote_list` will be converted
+              to the higher accuracy float type of the operator inputs, and operators not listed will run in the
+              type defined by their inputs.
         dtype (Type): The type used in lower precision calculations, can be ``mstype.float16`` or ``mstype.bfloat16`` ,
             default: ``mstype.float16`` .
     Raises:
-        TypeError: If `network` is not a Cell.
+        TypeError: If `network` is not a Cell or a function.
         ValueError: If `dtype` is not one of ``mstype.float16`` , ``mstype.bfloat16`` .
         ValueError: If `amp_level` is not within the supported range.
@@ -368,7 +488,12 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
         >>> net = amp.auto_mixed_precision(network, amp_level)
     """
     if not isinstance(network, nn.Cell):
-        raise TypeError("The network type should be Cell.")
+        if amp_level == "auto":
+            if not inspect.isfunction(network) and not inspect.ismethod(network):
+                raise TypeError("For amp_level 'auto', the network type should be Cell or function.")
+            # function is supported for amp_level 'auto'
+        else:
+            raise TypeError(f"For amp_level '{amp_level}', the network type should be Cell.")
     if dtype not in (mstype.float16, mstype.bfloat16):
         raise ValueError(f"The dtype should be one of (mstype.float16, mstype.bfloat16), but got {dtype}.")
@@ -377,7 +502,7 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
         return network
     # Return network if the same amp level has already been configurated
-    if getattr(network, "_amp_level") in ("O1", "O2", "O3"):
+    if hasattr(network, "_amp_level") and getattr(network, "_amp_level") in ("O1", "O2", "O3", "auto"):
         logger.warning(f"The network's auto mixed-precision level is adjusted from {getattr(network, '_amp_level')} "
                        f"to {amp_level}, and repeated calls to mixed-precision interfaces can cause performance "
                        f"degradation.")
@@ -396,8 +521,16 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
         else:
             network.to_float(dtype)
             network = _OutputTo32(network)
+    elif amp_level == "auto":
+        white_list = [(prim.__name__, AMP_PRIM_ARG_TABLE[prim]) for prim in AMP_AUTO_WHITE_LIST]
+        black_list = [(prim.__name__, AMP_PRIM_ARG_TABLE[prim]) for prim in AMP_AUTO_BLACK_LIST]
+        # set amp_strategy attribute for the object
+        amp_strategy = create_amp_strategy(AmpLevel.AmpAuto, dtype, white_list, black_list)
+        setattr(network, "amp_strategy", amp_strategy)
+        # set amp_strategy context decorator for the object
+        network = _set_amp_decorator(network, AmpLevel.AmpAuto, dtype, white_list, black_list)
     else:
-        raise ValueError("The amp level {} is not supported".format(amp_level))
+        raise ValueError(f"The amp level {amp_level} is not supported")
     setattr(network, "_amp_level", amp_level)
@@ -437,6 +570,10 @@ _config_level = {
     "O3": {
         "keep_batchnorm_fp32": False,
         "cast_model_type": mstype.float16,
+        "loss_scale_manager": None},
+    "auto": {
+        "keep_batchnorm_fp32": False,
+        "cast_model_type": mstype.float32,
         "loss_scale_manager": None}}
@@ -461,20 +598,11 @@ def _check_kwargs(key_words):
 def _check_level(level, boost_level):
     """Check level."""
     if not isinstance(level, str):
-        raise TypeError("The argument `level` must be a string in ['O0', 'O1', 'O2', 'O3', 'auto'], \
-                         but got type {}.".format(type(level)))
+        raise TypeError(f"The argument `level` must be a string in ['O0', 'O1', 'O2', 'O3', 'auto'],"
+                        f"but got type {type(level)}.")
     validator.check('level', level, "", ['O0', 'O1', 'O2', 'O3', 'auto'], validator.IN)
     validator.check('boost_level', boost_level, "", ['O0', 'O1', 'O2'], validator.IN)
-    if level == "auto":
-        device_target = context.get_context('device_target')
-        if device_target == "GPU":
-            level = "O2"
-        elif device_target == "Ascend":
-            level = "O3"
-        else:
-            raise ValueError("Level `auto` only support when `device_target` is GPU or Ascend.")
     enable_boost = False
     if boost_level in ["O1", "O2"]:
         enable_boost = True
@@ -499,7 +627,8 @@ def _add_loss_network(network, loss_fn, cast_model_type):
             return self._loss_fn(F.mixed_precision_cast(mstype.float32, out), label)
     validator.check_value_type('loss_fn', loss_fn, nn.Cell)
-    if cast_model_type == mstype.float16:
+    if cast_model_type in (mstype.float16, mstype.bfloat16) or \
+       (hasattr(network, "_amp_level") and getattr(network, "_amp_level") in ("O2", "O3", "auto")):
         network = WithLossCell(network, loss_fn)
     else:
         network = nn.WithLossCell(network, loss_fn)
@@ -555,20 +684,10 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', boost_leve
             Default: ``None`` .
         level (str): Supports ['O0', 'O1', 'O2', 'O3', 'auto']. Default: ``'O0'`` .
-            - 'O0': Do not change.
-            - 'O1': Cast the operators in white_list to float16, the remaining operators are kept in float32.
-              The operators in the whitelist: [Conv1d, Conv2d, Conv3d, Conv1dTranspose, Conv2dTranspose,
-              Conv3dTranspose, Dense, LSTMCell, RNNCell, GRUCell, MatMul, BatchMatMul, PReLU, ReLU, Ger].
-            - 'O2': Cast network to float16, keep `mindspore.nn.BatchNorm` series interface,
-              :class:`mindspore.nn.LayerNorm` and `loss_fn` (if set) run in float32, using dynamic loss scale.
-            - 'O3': Cast network to float16, with additional property `keep_batchnorm_fp32=False` .
-            - 'auto': Set to level to recommended level in different devices. Set level to 'O2' on GPU, Set
-              level to 'O3' Ascend. The recommended level is chosen by the export experience, not applicable to all
-              scenarios. User should specify the level for special network.
-            'O2' is recommended on GPU, 'O3' is recommended on Ascend. Property of `keep_batchnorm_fp32`,
-            `cast_model_type` and `loss_scale_manager` determined by `level` setting may be overwritten by settings in
-            `kwargs`.
+            For details on amp level, refer to :func:`mindspore.amp.auto_mixed_precision`.
+            Property of `keep_batchnorm_fp32`, `cast_model_type` and `loss_scale_manager` determined by `level`
+            setting may be overwritten by settings in `kwargs`.
         boost_level (str): Option for argument `level` in `mindspore.boost` , level for boost mode
             training. Supports ['O0', 'O1', 'O2']. Default: ``'O0'`` .
@@ -649,7 +768,7 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', boost_leve
 def get_white_list():
     """
-    Provide a copy of internal white list used by auto mixed precision.
+    Provide a copy of internal white list used by auto mixed precision with `amp_level` set to ``O1``.
     The current built-in whitelist contents are:
@@ -687,7 +806,7 @@ def get_white_list():
 def get_black_list():
     """
-    Provide a copy of internal black list used by auto mixed precision.
+    Provide a copy of internal black list used by auto mixed precision with `amp_level` set to ``O2``.
     The current built-in blacklist contents are:
@@ -710,7 +829,6 @@ def get_black_list():
 def custom_mixed_precision(network, *, white_list=None, black_list=None, dtype=mstype.float16):
     """
-    Custom mixed precision by setting whitelist or blacklist.
     When the `white_list` is provided, primitives and cells in `white_list` will perform the precision conversion.
     When the `black_list` is provided, cells that are not in `black_list` will perform the pereision conversion.
     Only one of `white_list` and `black_list` should be provided.

mindspore/train/callback/__init__.py CHANGED Viewed

@@ -36,9 +36,9 @@ from mindspore.train.callback._reduce_lr_on_plateau import ReduceLROnPlateau
 from mindspore.train.callback._on_request_exit import OnRequestExit
 from mindspore.train.callback._backup_and_restore import BackupAndRestore
 from mindspore.train.callback._flops_collector import FlopsUtilizationCollector
-from mindspore.train.callback._mindio_ttp import MindIOTTPAdapter
+from mindspore.train.callback._tft_register import TFTRegister
 __all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint", "FlopsUtilizationCollector",
            "SummaryCollector", "CheckpointConfig", "RunContext", "LearningRateScheduler", "SummaryLandscape",
            "History", "LambdaCallback", "ReduceLROnPlateau", "EarlyStopping", "OnRequestExit", "BackupAndRestore",
-           "MindIOTTPAdapter"]
+           "TFTRegister"]

mindspore/train/callback/_callback.py CHANGED Viewed

@@ -123,7 +123,7 @@ class Callback:
     recording current attributes. Users can add custimized attributes to the information.
     Training process can also be stopped by calling `request_stop` method. For details
     of custom Callback, please check
-    `Callback tutorial <https://www.mindspore.cn/tutorials/en/master/advanced/model/
+    `Callback tutorial <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/
     callback.html#customized-callback-mechanism>`_.
     Examples:
@@ -493,7 +493,7 @@ class RunContext:
     `RunContext.original_args()` and add extra attributes to the information, but also can stop the
     training process by calling `request_stop` method. For details of custom Callback,
     please check
-    `Callback Mechanism <https://www.mindspore.cn/tutorials/en/master/advanced/model/callback.html>`_.
+    `Callback Mechanism <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/callback.html>`_.
     `RunContext.original_args()` holds the model context information as a dictionary variable, and
     different attributes of the dictionary are stored in training or eval process. Details are as follows:
@@ -575,7 +575,7 @@ class RunContext:
         Tutorial Examples:
             - `Callback Mechanism - Customized Callback Mechanism
-              <https://mindspore.cn/tutorials/en/master/advanced/model/callback.html#customized-callback-mechanism>`_
+              <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#customized-callback-mechanism>`_
         """
         return self._original_args
@@ -588,7 +588,7 @@ class RunContext:
         Tutorial Examples:
             - `Callback Mechanism - Customized Training Termination Time
-              <https://mindspore.cn/tutorials/en/master/advanced/model/callback.html#
+              <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#
               customized-training-termination-time>`_
         """
         self._stop_requested = True