mindspore 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +5 -2
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
- mindspore/_extends/parse/parser.py +28 -22
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +23 -2
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
- mindspore/amp.py +0 -18
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/__init__.py +18 -12
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +371 -96
- mindspore/common/_utils.py +7 -43
- mindspore/common/api.py +434 -135
- mindspore/common/dtype.py +98 -57
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/hook_handle.py +82 -3
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +17 -127
- mindspore/common/recompute.py +4 -13
- mindspore/common/tensor.py +50 -217
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +20 -106
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +35 -1
- mindspore/dataset/engine/datasets.py +338 -319
- mindspore/dataset/engine/datasets_user_defined.py +38 -22
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/api/cell.h +28 -4
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +0 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +5 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +6 -1
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +4 -3
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/__init__.py +4 -0
- mindspore/mint/distributed/distributed.py +392 -69
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/_functions.py +1 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +10 -10
- mindspore/mint/nn/layer/normalization.py +11 -16
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +231 -239
- mindspore/nn/layer/activation.py +4 -2
- mindspore/nn/layer/basic.py +56 -14
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +32 -127
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +1 -4
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +2 -4
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +39 -5
- mindspore/nn/wrap/grad_reducer.py +4 -89
- mindspore/numpy/array_creations.py +4 -4
- mindspore/numpy/fft.py +9 -9
- mindspore/numpy/utils_const.py +1 -1
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +1 -5
- mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
- mindspore/ops/auto_generate/gen_extend_func.py +6 -11
- mindspore/ops/auto_generate/gen_ops_def.py +385 -154
- mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +16 -2
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +2 -0
- mindspore/ops/function/array_func.py +24 -18
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +7 -6
- mindspore/ops/function/grad/grad_func.py +4 -12
- mindspore/ops/function/math_func.py +89 -86
- mindspore/ops/function/nn_func.py +92 -313
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +4 -1
- mindspore/ops/functional_overload.py +377 -30
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +12 -50
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +5 -50
- mindspore/ops/operations/comm_ops.py +95 -17
- mindspore/ops/operations/custom_ops.py +237 -22
- mindspore/ops/operations/debug_ops.py +33 -35
- mindspore/ops/operations/manually_defined/ops_def.py +39 -318
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +3 -3
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +4 -27
- mindspore/ops/tensor_method.py +88 -10
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_auto_parallel_context.py +5 -15
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +4 -6
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_utils.py +34 -17
- mindspore/parallel/auto_parallel.py +23 -9
- mindspore/parallel/checkpoint_transform.py +20 -2
- mindspore/parallel/cluster/process_entity/_api.py +28 -33
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/parallel/cluster/run.py +5 -3
- mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/function/reshard_func.py +6 -5
- mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
- mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
- mindspore/parallel/shard.py +7 -21
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +127 -20
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +40 -4
- mindspore/profiler/common/path_manager.py +65 -24
- mindspore/profiler/common/profiler_context.py +27 -14
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +10 -6
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/dynamic_profiler.py +91 -46
- mindspore/profiler/envprofiler.py +30 -5
- mindspore/profiler/experimental_config.py +18 -2
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +34 -7
- mindspore/profiler/profiler.py +193 -145
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +9 -6
- mindspore/runtime/executor.py +35 -0
- mindspore/runtime/memory.py +113 -0
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +8 -21
- mindspore/train/amp.py +6 -7
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +1 -17
- mindspore/train/callback/_flops_collector.py +10 -6
- mindspore/train/callback/_train_fault_tolerance.py +72 -25
- mindspore/train/data_sink.py +5 -9
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +41 -230
- mindspore/train/serialization.py +160 -401
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +152 -16
- mindspore/version.py +1 -1
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/__init__.py +0 -23
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/train/memory_profiling_pb2.py +0 -298
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
mindspore/common/parameter.py
CHANGED
|
@@ -21,7 +21,6 @@ from copy import copy
|
|
|
21
21
|
import time
|
|
22
22
|
import os
|
|
23
23
|
import sys
|
|
24
|
-
import math
|
|
25
24
|
import numbers
|
|
26
25
|
import numpy as np
|
|
27
26
|
|
|
@@ -29,19 +28,14 @@ from mindspore import log as logger
|
|
|
29
28
|
from mindspore.log import _LogActionOnce
|
|
30
29
|
from mindspore._c_expression import ParamInfo
|
|
31
30
|
from mindspore.common import dtype as mstype
|
|
32
|
-
from mindspore import context
|
|
33
|
-
from mindspore.common._utils import get_slice_num, get_slice_shape
|
|
34
31
|
from mindspore.common.initializer import initializer
|
|
35
32
|
from mindspore.common.tensor import Tensor, _TensorMeta
|
|
33
|
+
from mindspore.common.hook_handle import _update_hook_version
|
|
36
34
|
from mindspore import _checkparam as Validator
|
|
37
35
|
from mindspore._check_jit_forbidden_api import jit_forbidden_register
|
|
38
36
|
from mindspore._c_expression import TensorPy as Tensor_
|
|
39
37
|
from mindspore.parallel._tensor import _get_slice_index
|
|
40
38
|
from mindspore.parallel._auto_parallel_context import auto_parallel_context
|
|
41
|
-
from mindspore.parallel._ps_context import _is_role_worker, _is_role_pserver, _is_role_sched, _clone_hash_table, \
|
|
42
|
-
_is_ps_mode
|
|
43
|
-
from mindspore.parallel._ps_context import _reinsert_hash_table_size, _insert_accumu_init_info, _cache_enable
|
|
44
|
-
from mindspore.common._decorator import deprecated
|
|
45
39
|
from mindspore.communication._comm_helper import _is_initialized
|
|
46
40
|
from mindspore.communication import get_group_size, get_rank
|
|
47
41
|
import mindspore.common._monad as monad
|
|
@@ -71,19 +65,6 @@ def _is_parameter_generated(param_name):
|
|
|
71
65
|
# Global variable for parameter unique key.
|
|
72
66
|
_GLOBAL_PARAMETER_KEY = -1
|
|
73
67
|
|
|
74
|
-
# Global variable to mark the hook of parameter is updated
|
|
75
|
-
_parameter_hook_updated = True
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def set_parameter_hook_updated(value):
|
|
79
|
-
global _parameter_hook_updated
|
|
80
|
-
_parameter_hook_updated = value
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def parameter_hook_updated():
|
|
84
|
-
global _parameter_hook_updated
|
|
85
|
-
return _parameter_hook_updated
|
|
86
|
-
|
|
87
68
|
|
|
88
69
|
def _is_in_auto_parallel_mode():
|
|
89
70
|
"""Get parallel mode."""
|
|
@@ -92,7 +73,7 @@ def _is_in_auto_parallel_mode():
|
|
|
92
73
|
|
|
93
74
|
def _is_parallel_mode():
|
|
94
75
|
""" Whether is parallel mode """
|
|
95
|
-
if not _is_initialized()
|
|
76
|
+
if not _is_initialized():
|
|
96
77
|
return False
|
|
97
78
|
if os.getenv("RUN_MODE") != "predict":
|
|
98
79
|
return False
|
|
@@ -150,11 +131,7 @@ def _offload_if_config(data):
|
|
|
150
131
|
Args:
|
|
151
132
|
data: The parameter data to offload.
|
|
152
133
|
"""
|
|
153
|
-
if
|
|
154
|
-
return
|
|
155
|
-
|
|
156
|
-
offload_context = context.get_offload_context()
|
|
157
|
-
if offload_context.get("offload_param", None) != "disk":
|
|
134
|
+
if data is None:
|
|
158
135
|
return
|
|
159
136
|
|
|
160
137
|
data_size_threshold = 512
|
|
@@ -231,7 +208,10 @@ class Parameter(Tensor_):
|
|
|
231
208
|
self.param_a = Parameter(Tensor([1], ms.float32), name="name_a")
|
|
232
209
|
self.param_tuple = (self.param_a, self.param_a)
|
|
233
210
|
|
|
234
|
-
requires_grad (bool):
|
|
211
|
+
requires_grad (bool): It is Used to filter parameters in :func:`mindspore.nn.Cell.trainable_params()`.
|
|
212
|
+
If it is ``False``, the filter parameters will not be returned in
|
|
213
|
+
:func:`mindspore.nn.Cell.trainable_params()`.
|
|
214
|
+
Default: ``True`` .
|
|
235
215
|
layerwise_parallel (bool): When `layerwise_parallel` is true in data/hybrid parallel mode,
|
|
236
216
|
broadcast and gradients communication would not be applied to the `Parameter`. Default: ``False`` .
|
|
237
217
|
parallel_optimizer (bool): It is used to filter the weight shard operation in parallel mode. It works only when
|
|
@@ -242,10 +222,8 @@ class Parameter(Tensor_):
|
|
|
242
222
|
device(str): Only Ascend device target is supported. It is used to specify the device which the parameter is
|
|
243
223
|
stored. By default, the parameter will be stored on NPU while computing. When the device is specified as
|
|
244
224
|
``"CPU"``, the parameter will be loaded into the device when it needs to be used, and unloaded to the CPU
|
|
245
|
-
after use. It takes effext only when `
|
|
246
|
-
|
|
247
|
-
Less device memory is needed when device is
|
|
248
|
-
specified as ``"CPU"``.
|
|
225
|
+
after use. It takes effext only when `jit_level` is not ``"O2"`` and `memory_optimize_level` is ``O0``
|
|
226
|
+
in :func:`mindspore.set_context`. Less device memory is needed when device is specified as ``"CPU"``.
|
|
249
227
|
|
|
250
228
|
Examples:
|
|
251
229
|
>>> import numpy as np
|
|
@@ -284,8 +262,6 @@ class Parameter(Tensor_):
|
|
|
284
262
|
obj.is_default_input_init = init_data_flag
|
|
285
263
|
if obj.has_init:
|
|
286
264
|
obj.init_mode = default_input
|
|
287
|
-
else:
|
|
288
|
-
_offload_if_config(obj)
|
|
289
265
|
return obj
|
|
290
266
|
|
|
291
267
|
def __reduce_ex__(self, _):
|
|
@@ -301,7 +277,6 @@ class Parameter(Tensor_):
|
|
|
301
277
|
def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True,
|
|
302
278
|
storage_format="", device=None):
|
|
303
279
|
self.param_info = ParamInfo()
|
|
304
|
-
self.init_in_server = False
|
|
305
280
|
self.name = name
|
|
306
281
|
self.requires_grad = requires_grad
|
|
307
282
|
self.layerwise_parallel = layerwise_parallel
|
|
@@ -312,32 +287,15 @@ class Parameter(Tensor_):
|
|
|
312
287
|
self.is_init = False
|
|
313
288
|
self._inited_param = None
|
|
314
289
|
self._sliced = False
|
|
315
|
-
self.is_param_ps = False
|
|
316
|
-
self.push_weight_to_server = False
|
|
317
|
-
self.pull_weight_from_server = False
|
|
318
290
|
self.requires_aggr = True
|
|
319
291
|
self._cast_type = None
|
|
320
292
|
self._unique = False
|
|
321
293
|
self.is_in_parallel = _is_in_auto_parallel_mode()
|
|
322
294
|
self._pipeline_stage_list = []
|
|
323
|
-
self.slice_num = 1
|
|
324
295
|
if -1 in self.shape:
|
|
325
296
|
raise ValueError(f"All shape elements of the Parameter must be positive. But got None.")
|
|
326
297
|
if isinstance(default_input, (Tensor_, Tensor)):
|
|
327
|
-
|
|
328
|
-
# And save out range data to persistent storage to support TB-Level size parameter.
|
|
329
|
-
slice_num_of_persistent_data = get_slice_num(default_input.dtype, default_input.shape)
|
|
330
|
-
if slice_num_of_persistent_data > 1:
|
|
331
|
-
data_shape = list(default_input.shape)
|
|
332
|
-
slice_first_dim = math.ceil(data_shape[0] / slice_num_of_persistent_data)
|
|
333
|
-
data_shape[0] = slice_first_dim
|
|
334
|
-
self.param_info.use_persistent_storage = True
|
|
335
|
-
self.param_info.origin_shape = default_input.shape
|
|
336
|
-
self.slice_num = slice_num_of_persistent_data
|
|
337
|
-
Tensor_.__init__(self, dtype=default_input.dtype, shape=tuple(data_shape))
|
|
338
|
-
else:
|
|
339
|
-
Tensor_.__init__(self, dtype=default_input.dtype, shape=default_input.shape)
|
|
340
|
-
|
|
298
|
+
Tensor_.__init__(self, dtype=default_input.dtype, shape=default_input.shape)
|
|
341
299
|
elif isinstance(default_input, int):
|
|
342
300
|
Tensor_.__init__(self, dtype=mstype.int64, shape=())
|
|
343
301
|
elif isinstance(default_input, float):
|
|
@@ -399,11 +357,10 @@ class Parameter(Tensor_):
|
|
|
399
357
|
return (Tensor, data.asnumpy(), mstype.qint4x2)
|
|
400
358
|
return (Tensor, data.asnumpy())
|
|
401
359
|
|
|
402
|
-
not_init_data = not init_param or
|
|
403
|
-
or _is_in_auto_parallel_mode() or _is_parallel_mode()
|
|
360
|
+
not_init_data = not init_param or _is_in_auto_parallel_mode() or _is_parallel_mode()
|
|
404
361
|
if not_init_data:
|
|
405
362
|
# do not init data while in auto parallel.
|
|
406
|
-
return (Tensor, None, data.dtype,
|
|
363
|
+
return (Tensor, None, data.dtype, data.shape, data.init)
|
|
407
364
|
return (Tensor, data.init_data())
|
|
408
365
|
if isinstance(data, int):
|
|
409
366
|
return (Tensor, data, mstype.int32)
|
|
@@ -411,33 +368,6 @@ class Parameter(Tensor_):
|
|
|
411
368
|
return (Tensor, data, mstype.float32)
|
|
412
369
|
return (Tensor, data)
|
|
413
370
|
|
|
414
|
-
def set_param_ps(self, init_in_server=False):
|
|
415
|
-
"""
|
|
416
|
-
Set whether the trainable parameter is updated by parameter server and whether the
|
|
417
|
-
trainable parameter is initialized on server.
|
|
418
|
-
|
|
419
|
-
Note:
|
|
420
|
-
It only works when a running task is in the parameter server mode.
|
|
421
|
-
It is supported only in graph mode.
|
|
422
|
-
|
|
423
|
-
Args:
|
|
424
|
-
init_in_server (bool): Whether trainable parameter updated by parameter server is
|
|
425
|
-
initialized on server. Default: ``False``.
|
|
426
|
-
|
|
427
|
-
"""
|
|
428
|
-
if not _is_ps_mode() or not (_is_role_worker() or _is_role_pserver() or _is_role_sched()):
|
|
429
|
-
raise RuntimeError("Must complete following two steps before calling set_param_ps: \n"
|
|
430
|
-
"1. context.set_ps_context(enable_ps=True) \n"
|
|
431
|
-
"2. export MS_ROLE environment variable \n"
|
|
432
|
-
"Please refer to the official website for detailed usage.")
|
|
433
|
-
|
|
434
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
435
|
-
raise RuntimeError("Parameter server training is not supported in pynative mode currently."
|
|
436
|
-
"Please switch to graph mode and retry.")
|
|
437
|
-
self.is_param_ps = True
|
|
438
|
-
self.init_in_server = init_in_server
|
|
439
|
-
self.param_info.init_in_server = init_in_server
|
|
440
|
-
|
|
441
371
|
def copy(self):
|
|
442
372
|
"""
|
|
443
373
|
Copy the parameter.
|
|
@@ -453,16 +383,6 @@ class Parameter(Tensor_):
|
|
|
453
383
|
"""
|
|
454
384
|
return self.clone(init='same')
|
|
455
385
|
|
|
456
|
-
@deprecated("1.8", "set_param_fl")
|
|
457
|
-
def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
|
|
458
|
-
if push_to_server:
|
|
459
|
-
self.push_weight_to_server = True
|
|
460
|
-
if pull_from_server:
|
|
461
|
-
self.pull_weight_from_server = True
|
|
462
|
-
if not requires_aggr:
|
|
463
|
-
self.requires_aggr = False
|
|
464
|
-
self.param_info.requires_aggr = False
|
|
465
|
-
|
|
466
386
|
@property
|
|
467
387
|
def inited_param(self):
|
|
468
388
|
"""
|
|
@@ -528,8 +448,6 @@ class Parameter(Tensor_):
|
|
|
528
448
|
raise ValueError("The type of the Parameter's name should be 'string' or 'None', "
|
|
529
449
|
"but got {}.".format(type(name_)))
|
|
530
450
|
|
|
531
|
-
if _is_role_worker() and self.cache_enable:
|
|
532
|
-
_reinsert_hash_table_size(name_, self.param_info.name)
|
|
533
451
|
self.param_info.name = name_
|
|
534
452
|
|
|
535
453
|
@property
|
|
@@ -658,8 +576,6 @@ class Parameter(Tensor_):
|
|
|
658
576
|
x.param_info = param_info_clone
|
|
659
577
|
x.is_init = False
|
|
660
578
|
x.init = self.init
|
|
661
|
-
x.is_param_ps = self.is_param_ps
|
|
662
|
-
x.init_in_server = self.init_in_server
|
|
663
579
|
x.cache_enable = self.cache_enable
|
|
664
580
|
if x.cache_enable:
|
|
665
581
|
x.key = _get_unique_parameter_key()
|
|
@@ -667,7 +583,7 @@ class Parameter(Tensor_):
|
|
|
667
583
|
if self.cache_shape:
|
|
668
584
|
x.cache_shape = self.cache_shape
|
|
669
585
|
if init != 'same':
|
|
670
|
-
shape = self.shape
|
|
586
|
+
shape = self.shape
|
|
671
587
|
dtype = self.dtype
|
|
672
588
|
tensor = initializer(init, shape=shape, dtype=dtype)
|
|
673
589
|
x.set_data(tensor)
|
|
@@ -812,6 +728,7 @@ class Parameter(Tensor_):
|
|
|
812
728
|
raise TypeError("The argument `requires_grad` must be bool type")
|
|
813
729
|
Tensor_.wait_pipeline(self)
|
|
814
730
|
self.param_info.requires_grad = value
|
|
731
|
+
self._requires_grad = value
|
|
815
732
|
|
|
816
733
|
@property
|
|
817
734
|
def data(self):
|
|
@@ -878,20 +795,6 @@ class Parameter(Tensor_):
|
|
|
878
795
|
raise TypeError("The original tensor data is initialized, but the argument 'data' is not initialized."
|
|
879
796
|
"Please initialize 'data' before call this method.")
|
|
880
797
|
|
|
881
|
-
@staticmethod
|
|
882
|
-
def _from_tensor(tensor, *args, **kwargs):
|
|
883
|
-
"""Create a `Parameter` that data is shared from a `Tensor`."""
|
|
884
|
-
if not isinstance(tensor, Tensor_):
|
|
885
|
-
raise TypeError(f"The type of input must be Tensor, but got {type(tensor)}.")
|
|
886
|
-
param = Tensor_.__new__(Parameter)
|
|
887
|
-
Tensor_.__init__(param, tensor)
|
|
888
|
-
param.init = None
|
|
889
|
-
param.init_mode = None
|
|
890
|
-
param.has_init = False
|
|
891
|
-
param.is_default_input_init = False
|
|
892
|
-
Parameter.__init__(param, tensor, *args, **kwargs)
|
|
893
|
-
return param
|
|
894
|
-
|
|
895
798
|
@jit_forbidden_register
|
|
896
799
|
def set_data(self, data, slice_shape=False):
|
|
897
800
|
"""
|
|
@@ -997,16 +900,7 @@ class Parameter(Tensor_):
|
|
|
997
900
|
|
|
998
901
|
init_data_args = self._get_init_data_args(layout)
|
|
999
902
|
|
|
1000
|
-
|
|
1001
|
-
return self
|
|
1002
|
-
if self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Tensor) and \
|
|
1003
|
-
self.init_mode.init is not None and _is_role_worker():
|
|
1004
|
-
if self.cache_enable:
|
|
1005
|
-
data = self.init_mode.init_data(*init_data_args)
|
|
1006
|
-
else:
|
|
1007
|
-
data = self.init_mode.init_data(0, [1])
|
|
1008
|
-
else:
|
|
1009
|
-
data = self.init_mode.init_data(*init_data_args)
|
|
903
|
+
data = self.init_mode.init_data(*init_data_args)
|
|
1010
904
|
origin_dtype = self.dtype
|
|
1011
905
|
obj = self._update_tensor_data(data)
|
|
1012
906
|
if self.dtype != origin_dtype:
|
|
@@ -1015,7 +909,6 @@ class Parameter(Tensor_):
|
|
|
1015
909
|
self._inited_param = obj
|
|
1016
910
|
obj.init_mode = None
|
|
1017
911
|
obj.sliced = set_sliced
|
|
1018
|
-
_offload_if_config(obj)
|
|
1019
912
|
return obj
|
|
1020
913
|
|
|
1021
914
|
def register_hook(self, hook_fn):
|
|
@@ -1023,11 +916,11 @@ class Parameter(Tensor_):
|
|
|
1023
916
|
For details, please refer to :func:`mindspore.Tensor.register_hook`.
|
|
1024
917
|
"""
|
|
1025
918
|
handle = Tensor.register_hook(self, hook_fn)
|
|
1026
|
-
|
|
919
|
+
_update_hook_version()
|
|
1027
920
|
return handle
|
|
1028
921
|
|
|
1029
922
|
def _remove_hook(self):
|
|
1030
|
-
|
|
923
|
+
_update_hook_version()
|
|
1031
924
|
|
|
1032
925
|
def _offload(self):
|
|
1033
926
|
r"""
|
|
@@ -1170,9 +1063,6 @@ class ParameterTuple(tuple):
|
|
|
1170
1063
|
if not x1.cache_enable:
|
|
1171
1064
|
continue
|
|
1172
1065
|
|
|
1173
|
-
if _is_role_worker():
|
|
1174
|
-
_clone_hash_table(x.name, x.key, x1.name, x1.key)
|
|
1175
|
-
_insert_accumu_init_info(x1.name, init_to_value(init))
|
|
1176
1066
|
return ParameterTuple(new)
|
|
1177
1067
|
|
|
1178
1068
|
def __parameter_tuple__(self):
|
mindspore/common/recompute.py
CHANGED
|
@@ -18,12 +18,11 @@ from collections import OrderedDict
|
|
|
18
18
|
from types import MethodType
|
|
19
19
|
from mindspore import log as logger
|
|
20
20
|
from mindspore.nn.cell import Cell
|
|
21
|
-
from mindspore import context
|
|
22
21
|
from mindspore.common.tensor import Tensor
|
|
23
22
|
from mindspore import ops
|
|
24
23
|
from mindspore.ops.composite import GradOperation
|
|
25
24
|
from mindspore.common._register_for_recompute import recompute_registry
|
|
26
|
-
from mindspore.common.api import _pynative_executor, _no_grad
|
|
25
|
+
from mindspore.common.api import _pynative_executor, _no_grad, _run_in_jit
|
|
27
26
|
from mindspore.common.generator import get_rng_state, set_rng_state
|
|
28
27
|
from mindspore.train.amp import AmpDecorator
|
|
29
28
|
from mindspore._c_expression.amp import get_curr_amp_strategy
|
|
@@ -211,12 +210,6 @@ def _detach_input(input_arg):
|
|
|
211
210
|
def _check_validation(block):
|
|
212
211
|
if not isinstance(block, Cell):
|
|
213
212
|
raise TypeError("Recompute function now only support block which inherited from Cell!")
|
|
214
|
-
if context.get_context("mode") != context.PYNATIVE_MODE:
|
|
215
|
-
raise AssertionError("Recompute function now only support pynative mode, you can use "
|
|
216
|
-
"Cell.recompute() in graph mode.")
|
|
217
|
-
if block.construct.__code__.co_name == "staging_specialize":
|
|
218
|
-
logger.warning('Block\'s construct method decorated by @jit that recompute '
|
|
219
|
-
'function will not come into effect.')
|
|
220
213
|
|
|
221
214
|
|
|
222
215
|
def recompute(block, *args, **kwargs):
|
|
@@ -225,10 +218,7 @@ def recompute(block, *args, **kwargs):
|
|
|
225
218
|
storing the intermediate activation computed in forward pass, we will recompute it in backward pass.
|
|
226
219
|
|
|
227
220
|
Note:
|
|
228
|
-
|
|
229
|
-
- This function interface now only support pynative mode. you can use Cell.recompute interface
|
|
230
|
-
in graph mode.
|
|
231
|
-
- When use recompute function, block object should not decorated by @jit.
|
|
221
|
+
Recompute function only support block which inherited from Cell object.
|
|
232
222
|
|
|
233
223
|
Args:
|
|
234
224
|
block (Cell): Block to be recompute.
|
|
@@ -240,7 +230,6 @@ def recompute(block, *args, **kwargs):
|
|
|
240
230
|
|
|
241
231
|
Raises:
|
|
242
232
|
TypeError: If `block` is not Cell object.
|
|
243
|
-
AssertionError: If execute mode is not PYNATIVE_MODE.
|
|
244
233
|
|
|
245
234
|
Supported Platforms:
|
|
246
235
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -274,6 +263,8 @@ def recompute(block, *args, **kwargs):
|
|
|
274
263
|
"""
|
|
275
264
|
|
|
276
265
|
_check_validation(block)
|
|
266
|
+
if _run_in_jit(): # @jit.cond: True
|
|
267
|
+
return ops.recompute_block(block)(*args, **kwargs)
|
|
277
268
|
return _RecomputeCell(block)(*args, **kwargs)
|
|
278
269
|
|
|
279
270
|
|