mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
mindspore/nn/optim/ftrl.py
CHANGED
|
@@ -191,8 +191,7 @@ class FTRL(Optimizer):
|
|
|
191
191
|
|
|
192
192
|
FTRL is an online convex optimization algorithm that adaptively chooses its regularization function
|
|
193
193
|
based on the loss functions. Refer to paper `Adaptive Bound Optimization for Online Convex Optimization
|
|
194
|
-
<https://arxiv.org/abs/1002.4908>`_.
|
|
195
|
-
<https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf>`_ for engineering document.
|
|
194
|
+
<https://arxiv.org/abs/1002.4908>`_.
|
|
196
195
|
|
|
197
196
|
The updating formulas are as follows,
|
|
198
197
|
|
|
@@ -250,20 +249,21 @@ class FTRL(Optimizer):
|
|
|
250
249
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
251
250
|
one group of `params`.
|
|
252
251
|
|
|
253
|
-
initial_accum (float): The starting value for accumulators `m`, must be zero or positive values.
|
|
252
|
+
initial_accum (float): The starting value for accumulators `m`, must be zero or positive values.
|
|
253
|
+
Default: ``0.1`` .
|
|
254
254
|
learning_rate (float): The learning rate value, must be zero or positive, dynamic learning rate is currently
|
|
255
|
-
not supported. Default: 0.001.
|
|
255
|
+
not supported. Default: ``0.001`` .
|
|
256
256
|
lr_power (float): Learning rate power controls how the learning rate decreases during training, must be less
|
|
257
|
-
than or equal to zero. Use fixed learning rate if lr_power is zero. Default:
|
|
258
|
-
l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
|
|
259
|
-
l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
|
|
260
|
-
use_locking (bool): If true, use locks for updating operation. Default: False.
|
|
257
|
+
than or equal to zero. Use fixed learning rate if lr_power is zero. Default: ``-0.5`` .
|
|
258
|
+
l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: ``0.0`` .
|
|
259
|
+
l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: ``0.0`` .
|
|
260
|
+
use_locking (bool): If true, use locks for updating operation. Default: ``False`` .
|
|
261
261
|
loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
|
|
262
262
|
Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
263
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
263
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
264
264
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
265
|
-
Default: 1.0.
|
|
266
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
265
|
+
Default: ``1.0`` .
|
|
266
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
267
267
|
|
|
268
268
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
269
269
|
|
|
@@ -295,7 +295,9 @@ class FTRL(Optimizer):
|
|
|
295
295
|
>>> import mindspore as ms
|
|
296
296
|
>>> from mindspore import nn
|
|
297
297
|
>>>
|
|
298
|
-
>>>
|
|
298
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
299
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
300
|
+
>>> net = LeNet5()
|
|
299
301
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
300
302
|
>>> optim = nn.FTRL(params=net.trainable_params())
|
|
301
303
|
>>>
|
|
@@ -313,7 +315,7 @@ class FTRL(Optimizer):
|
|
|
313
315
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
314
316
|
>>>
|
|
315
317
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
316
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
318
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
317
319
|
"""
|
|
318
320
|
|
|
319
321
|
@opt_init_args_register
|
|
@@ -357,6 +359,7 @@ class FTRL(Optimizer):
|
|
|
357
359
|
grads = self.scale_grad(grads)
|
|
358
360
|
grads = self._grad_sparse_indices_deduplicate(grads)
|
|
359
361
|
lr = self.get_lr()
|
|
362
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
360
363
|
|
|
361
364
|
if self.use_dist_optimizer:
|
|
362
365
|
success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self._ps_push, self._ps_pull,
|
|
@@ -377,12 +380,12 @@ class FTRL(Optimizer):
|
|
|
377
380
|
optimizer operation.
|
|
378
381
|
"""
|
|
379
382
|
if not isinstance(value, str):
|
|
380
|
-
raise TypeError("For 'FTRL', the property 'target' must be string type, "
|
|
381
|
-
"but got type {
|
|
383
|
+
raise TypeError(f"For 'FTRL', the property 'target' must be string type, "
|
|
384
|
+
f"but got type {type(value)}.")
|
|
382
385
|
|
|
383
386
|
if value not in ('CPU', 'Ascend', 'GPU'):
|
|
384
|
-
raise ValueError("For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
|
|
385
|
-
"but got {}"
|
|
387
|
+
raise ValueError(f"For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
|
|
388
|
+
f"but got {value}.")
|
|
386
389
|
|
|
387
390
|
if value == 'CPU':
|
|
388
391
|
self.sparse_opt = P.FusedSparseFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
|
mindspore/nn/optim/lamb.py
CHANGED
|
@@ -132,7 +132,7 @@ class Lamb(Optimizer):
|
|
|
132
132
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
133
133
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
134
134
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
|
|
135
|
-
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.
|
|
135
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
|
|
136
136
|
process `loss_scale` correctly.
|
|
137
137
|
|
|
138
138
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -140,6 +140,10 @@ class Lamb(Optimizer):
|
|
|
140
140
|
parameters are grouped, each group can set `weight_decay`. If not, the `weight_decay` in optimizer will be
|
|
141
141
|
applied.
|
|
142
142
|
|
|
143
|
+
.. warning::
|
|
144
|
+
The update process of the Lamb optimizer is not completely elementwise, and the sharding of weights in
|
|
145
|
+
distributed parallel may affect the update result.
|
|
146
|
+
|
|
143
147
|
Args:
|
|
144
148
|
params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the
|
|
145
149
|
`params` is a list of `dict`, the string "params", "lr", "weight_decay", "grad_centralization" and
|
|
@@ -181,14 +185,14 @@ class Lamb(Optimizer):
|
|
|
181
185
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
182
186
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
183
187
|
|
|
184
|
-
beta1 (float): The exponential decay rate for the 1st moment estimations. Default: 0.9.
|
|
188
|
+
beta1 (float): The exponential decay rate for the 1st moment estimations. Default: ``0.9`` .
|
|
185
189
|
Should be in range (0.0, 1.0).
|
|
186
|
-
beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: 0.999.
|
|
190
|
+
beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: ``0.999`` .
|
|
187
191
|
Should be in range (0.0, 1.0).
|
|
188
|
-
eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
|
|
192
|
+
eps (float): Term added to the denominator to improve numerical stability. Default: ``1e-6`` .
|
|
189
193
|
Should be greater than 0.
|
|
190
194
|
|
|
191
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
195
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
192
196
|
|
|
193
197
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
194
198
|
|
|
@@ -201,7 +205,7 @@ class Lamb(Optimizer):
|
|
|
201
205
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
202
206
|
|
|
203
207
|
Outputs:
|
|
204
|
-
tuple[bool], all elements are True.
|
|
208
|
+
tuple[bool], all elements are ``True`` .
|
|
205
209
|
|
|
206
210
|
Raises:
|
|
207
211
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -218,14 +222,15 @@ class Lamb(Optimizer):
|
|
|
218
222
|
Examples:
|
|
219
223
|
>>> import mindspore as ms
|
|
220
224
|
>>> from mindspore import nn
|
|
221
|
-
>>> from mindspore.nn import learning_rate_schedule
|
|
222
225
|
>>>
|
|
223
|
-
>>>
|
|
226
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
227
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
228
|
+
>>> net = LeNet5()
|
|
224
229
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
225
230
|
>>> optim = nn.Lamb(params=net.trainable_params(), learning_rate=0.1)
|
|
226
231
|
>>>
|
|
227
232
|
>>> #2) Use parameter groups and set different values
|
|
228
|
-
>>> poly_decay_lr =
|
|
233
|
+
>>> poly_decay_lr = nn.PolynomialDecayLR(learning_rate=0.1, end_learning_rate=0.01,
|
|
229
234
|
... decay_steps=4, power = 0.5)
|
|
230
235
|
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
|
|
231
236
|
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
|
|
@@ -240,7 +245,7 @@ class Lamb(Optimizer):
|
|
|
240
245
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
241
246
|
>>>
|
|
242
247
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
243
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
248
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
244
249
|
"""
|
|
245
250
|
_support_parallel_optimizer = True
|
|
246
251
|
|
|
@@ -262,8 +267,7 @@ class Lamb(Optimizer):
|
|
|
262
267
|
def construct(self, gradients):
|
|
263
268
|
weight_decay = self.get_weight_decay()
|
|
264
269
|
lr = self.get_lr()
|
|
265
|
-
|
|
266
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
270
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
267
271
|
lamb_opt = _lamb_opt
|
|
268
272
|
gradients = self.flatten_gradients(gradients)
|
|
269
273
|
gradients = self.gradients_centralization(gradients)
|
mindspore/nn/optim/lars.py
CHANGED
|
@@ -87,10 +87,10 @@ class LARS(Optimizer):
|
|
|
87
87
|
:math:`\gamma` represents `learning_rate` in `optimizer`, :math:`\eta` represents `coefficient`.
|
|
88
88
|
|
|
89
89
|
Args:
|
|
90
|
-
optimizer (Optimizer): MindSpore optimizer for which to wrap and modify gradients.
|
|
91
|
-
epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
|
|
92
|
-
coefficient (float): Trust coefficient for calculating the local learning rate. Default: 0.001.
|
|
93
|
-
use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False.
|
|
90
|
+
optimizer (:class:`mindspore.nn.Optimizer`): MindSpore optimizer for which to wrap and modify gradients.
|
|
91
|
+
epsilon (float): Term added to the denominator to improve numerical stability. Default: ``1e-05`` .
|
|
92
|
+
coefficient (float): Trust coefficient for calculating the local learning rate. Default: ``0.001`` .
|
|
93
|
+
use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: ``False`` .
|
|
94
94
|
lars_filter (Function): A function to determine which of the network parameters to use LARS algorithm. Default:
|
|
95
95
|
lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
|
|
96
96
|
|
|
@@ -108,11 +108,13 @@ class LARS(Optimizer):
|
|
|
108
108
|
>>> import mindspore as ms
|
|
109
109
|
>>> from mindspore import nn
|
|
110
110
|
>>>
|
|
111
|
-
>>>
|
|
111
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
112
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
113
|
+
>>> net = LeNet5()
|
|
112
114
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
113
115
|
>>> opt = nn.Momentum(net.trainable_params(), 0.1, 0.9)
|
|
114
116
|
>>> opt_lars = nn.LARS(opt, epsilon=1e-08, coefficient=0.02)
|
|
115
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=opt_lars, metrics=None)
|
|
117
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=opt_lars, metrics=None)
|
|
116
118
|
"""
|
|
117
119
|
|
|
118
120
|
@opt_init_args_register
|
mindspore/nn/optim/lazyadam.py
CHANGED
|
@@ -270,14 +270,15 @@ class LazyAdam(Optimizer):
|
|
|
270
270
|
\begin{array}{ll} \\
|
|
271
271
|
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
|
272
272
|
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
|
273
|
-
|
|
274
|
-
|
|
273
|
+
\widehat{m_{t+1}} = \frac{m_{t+1}}{1-\beta_1^t} \\
|
|
274
|
+
\widehat{v_{t+1}} = \frac{v_{t+1}}{1-\beta_2^t} \\
|
|
275
|
+
w_{t+1} = w_{t} - \gamma * \frac{\widehat{m_{t+1}}}{\sqrt{\widehat{v_{t+1}}} + \epsilon}
|
|
275
276
|
\end{array}
|
|
276
277
|
|
|
277
278
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
|
278
|
-
:math:`g` represents `gradients`, :math
|
|
279
|
+
:math:`g` represents `gradients`, :math:`\gamma` represents `learning_rate`, :math:`\beta_1, \beta_2` represent
|
|
279
280
|
`beta1` and `beta2`, :math:`t` represents the current step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
|
280
|
-
`beta1_power` and `beta2_power`, :math
|
|
281
|
+
`beta1_power` and `beta2_power`, :math:`w` represents `params`,
|
|
281
282
|
:math:`\epsilon` represents `eps`.
|
|
282
283
|
|
|
283
284
|
Note:
|
|
@@ -320,7 +321,7 @@ class LazyAdam(Optimizer):
|
|
|
320
321
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
321
322
|
one group of `params`.
|
|
322
323
|
|
|
323
|
-
learning_rate (Union[float, int, Tensor, Iterable,
|
|
324
|
+
learning_rate (Union[float, int, Tensor, Iterable, :class:`~.train.LearningRateScheduler`]): Default: ``1e-3`` .
|
|
324
325
|
|
|
325
326
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
326
327
|
|
|
@@ -335,19 +336,19 @@ class LazyAdam(Optimizer):
|
|
|
335
336
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
336
337
|
|
|
337
338
|
beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
|
|
338
|
-
Default: 0.9.
|
|
339
|
+
Default: ``0.9`` .
|
|
339
340
|
beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
|
|
340
|
-
Default: 0.999.
|
|
341
|
-
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
342
|
-
1e-8.
|
|
341
|
+
Default: ``0.999`` .
|
|
342
|
+
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
343
|
+
Default: ``1e-8`` .
|
|
343
344
|
use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
|
|
344
|
-
If true, updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
345
|
-
If false, the result is unpredictable. Default: False.
|
|
345
|
+
If ``true`` , updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
346
|
+
If ``false`` , the result is unpredictable. Default: ``False`` .
|
|
346
347
|
use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
|
|
347
|
-
If true, update the gradients using NAG.
|
|
348
|
-
If false, update the gradients without using NAG. Default: False.
|
|
348
|
+
If ``true`` , update the gradients using NAG.
|
|
349
|
+
If ``false`` , update the gradients without using NAG. Default: ``False`` .
|
|
349
350
|
|
|
350
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
351
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
351
352
|
|
|
352
353
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
353
354
|
|
|
@@ -358,18 +359,19 @@ class LazyAdam(Optimizer):
|
|
|
358
359
|
|
|
359
360
|
loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. In general,
|
|
360
361
|
use the default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update`
|
|
361
|
-
in `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
362
|
+
in `FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
362
363
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
363
|
-
Default: 1.0.
|
|
364
|
+
Default: ``1.0`` .
|
|
364
365
|
|
|
365
366
|
Inputs:
|
|
366
367
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
367
368
|
|
|
368
369
|
Outputs:
|
|
369
|
-
Tensor[bool], the value is True.
|
|
370
|
+
Tensor[bool], the value is ``True`` .
|
|
370
371
|
|
|
371
372
|
Raises:
|
|
372
|
-
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable,
|
|
373
|
+
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable,
|
|
374
|
+
:class:`~.train.LearningRateScheduler`.
|
|
373
375
|
TypeError: If element of `parameters` is neither Parameter nor dict.
|
|
374
376
|
TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
|
|
375
377
|
TypeError: If `weight_decay` is neither float nor int.
|
|
@@ -385,7 +387,9 @@ class LazyAdam(Optimizer):
|
|
|
385
387
|
>>> import mindspore as ms
|
|
386
388
|
>>> from mindspore import nn
|
|
387
389
|
>>>
|
|
388
|
-
>>>
|
|
390
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
391
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
392
|
+
>>> net = LeNet5()
|
|
389
393
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
390
394
|
>>> optim = nn.LazyAdam(params=net.trainable_params())
|
|
391
395
|
>>>
|
|
@@ -403,7 +407,7 @@ class LazyAdam(Optimizer):
|
|
|
403
407
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
404
408
|
>>>
|
|
405
409
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
406
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
410
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
407
411
|
"""
|
|
408
412
|
|
|
409
413
|
@deprecated("2.0", "Adam", False)
|
|
@@ -442,6 +446,7 @@ class LazyAdam(Optimizer):
|
|
|
442
446
|
gradients = self.scale_grad(gradients)
|
|
443
447
|
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
444
448
|
lr = self.get_lr()
|
|
449
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
445
450
|
|
|
446
451
|
beta1_power = self.beta1_power * self.beta1
|
|
447
452
|
self.beta1_power = beta1_power
|
mindspore/nn/optim/momentum.py
CHANGED
|
@@ -134,7 +134,7 @@ class Momentum(Optimizer):
|
|
|
134
134
|
momentum (float): Hyperparameter of type float, means momentum for the moving average.
|
|
135
135
|
It must be at least 0.0.
|
|
136
136
|
|
|
137
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
137
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
138
138
|
|
|
139
139
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
140
140
|
|
|
@@ -145,16 +145,16 @@ class Momentum(Optimizer):
|
|
|
145
145
|
|
|
146
146
|
loss_scale (float): A floating point value for the loss scale. It must be greater than 0.0. In general, use the
|
|
147
147
|
default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
148
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
148
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
149
149
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
150
|
-
Default: 1.0.
|
|
151
|
-
use_nesterov (bool): Enable Nesterov momentum. Default: False.
|
|
150
|
+
Default: ``1.0`` .
|
|
151
|
+
use_nesterov (bool): Enable Nesterov momentum. Default: ``False`` .
|
|
152
152
|
|
|
153
153
|
Inputs:
|
|
154
154
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
155
155
|
|
|
156
156
|
Outputs:
|
|
157
|
-
tuple[bool]. All elements are True.
|
|
157
|
+
tuple[bool]. All elements are ``True`` .
|
|
158
158
|
|
|
159
159
|
Raises:
|
|
160
160
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -172,7 +172,9 @@ class Momentum(Optimizer):
|
|
|
172
172
|
>>> import mindspore as ms
|
|
173
173
|
>>> from mindspore import nn
|
|
174
174
|
>>>
|
|
175
|
-
>>>
|
|
175
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
176
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
177
|
+
>>> net = LeNet5()
|
|
176
178
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
177
179
|
>>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
178
180
|
>>>
|
|
@@ -190,7 +192,7 @@ class Momentum(Optimizer):
|
|
|
190
192
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
191
193
|
>>>
|
|
192
194
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
193
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
195
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
194
196
|
"""
|
|
195
197
|
@opt_init_args_register
|
|
196
198
|
def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov=False):
|
|
@@ -218,6 +220,7 @@ class Momentum(Optimizer):
|
|
|
218
220
|
gradients = self.gradients_centralization(gradients)
|
|
219
221
|
gradients = self.scale_grad(gradients)
|
|
220
222
|
lr = self.get_lr()
|
|
223
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
221
224
|
if self.use_dist_optimizer:
|
|
222
225
|
if self.is_group_lr:
|
|
223
226
|
success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
|
mindspore/nn/optim/optimizer.py
CHANGED
|
@@ -121,13 +121,13 @@ class Optimizer(Cell):
|
|
|
121
121
|
|
|
122
122
|
weight_decay (Union[float, int]): An int or a floating point value for the weight decay.
|
|
123
123
|
It must be equal to or greater than 0.
|
|
124
|
-
If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0.
|
|
124
|
+
If the type of `weight_decay` input is int, it will be converted to float. Default: ``0.0`` .
|
|
125
125
|
loss_scale (float): A floating point value for the loss scale. It must be greater than 0. If the
|
|
126
126
|
type of `loss_scale` input is int, it will be converted to float. In general, use the default value. Only
|
|
127
127
|
when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
128
|
-
`FixedLossScaleManager` is set to False, this value needs to be the same as the `loss_scale` in
|
|
128
|
+
`FixedLossScaleManager` is set to ``False`` , this value needs to be the same as the `loss_scale` in
|
|
129
129
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
130
|
-
Default: 1.0.
|
|
130
|
+
Default: ``1.0`` .
|
|
131
131
|
|
|
132
132
|
Raises:
|
|
133
133
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -140,6 +140,57 @@ class Optimizer(Cell):
|
|
|
140
140
|
|
|
141
141
|
Supported Platforms:
|
|
142
142
|
``Ascend`` ``GPU`` ``CPU``
|
|
143
|
+
|
|
144
|
+
Examples:
|
|
145
|
+
>>> import mindspore as ms
|
|
146
|
+
>>> from mindspore import nn
|
|
147
|
+
>>> import numpy as np
|
|
148
|
+
>>> import mindspore
|
|
149
|
+
>>> from mindspore import nn, ops, Tensor
|
|
150
|
+
>>>
|
|
151
|
+
>>> class MyMomentum(nn.Optimizer):
|
|
152
|
+
... def __init__(self, params, learning_rate, momentum=0.9):
|
|
153
|
+
... super(MyMomentum, self).__init__(learning_rate, params)
|
|
154
|
+
... self.moments = self.parameters.clone(prefix="moments", init="zeros")
|
|
155
|
+
... self.momentum = momentum
|
|
156
|
+
... self.opt = ops.ApplyMomentum()
|
|
157
|
+
...
|
|
158
|
+
... def construct(self, gradients):
|
|
159
|
+
... params = self.parameters
|
|
160
|
+
... lr = self.get_lr()
|
|
161
|
+
... gradients = self.flatten_gradients(gradients)
|
|
162
|
+
... gradients = self.decay_weight(gradients)
|
|
163
|
+
... gradients = self.gradients_centralization(gradients)
|
|
164
|
+
... gradients = self.scale_grad(gradients)
|
|
165
|
+
...
|
|
166
|
+
... success = None
|
|
167
|
+
... for param, mom, grad in zip(params, self.moments, gradients):
|
|
168
|
+
... success = self.opt(param, mom, lr, grad, self.momentum)
|
|
169
|
+
... return success
|
|
170
|
+
>>>
|
|
171
|
+
>>> net = nn.Dense(2, 3)
|
|
172
|
+
>>> loss_fn = nn.MAELoss()
|
|
173
|
+
>>> opt = MyMomentum(net.trainable_params(), 0.01)
|
|
174
|
+
>>>
|
|
175
|
+
>>> device_target = opt.target
|
|
176
|
+
>>> opt_unique = opt.unique
|
|
177
|
+
>>> weight_decay_value = opt.get_weight_decay()
|
|
178
|
+
>>>
|
|
179
|
+
>>> def forward_fn(data, label):
|
|
180
|
+
... logits = net(data)
|
|
181
|
+
... loss = loss_fn(logits, label)
|
|
182
|
+
... return loss, logits
|
|
183
|
+
>>>
|
|
184
|
+
>>> grad_fn = mindspore.value_and_grad(forward_fn, None, opt.parameters, has_aux=True)
|
|
185
|
+
>>>
|
|
186
|
+
>>> def train_step(data, label):
|
|
187
|
+
... (loss, _), grads = grad_fn(data, label)
|
|
188
|
+
... opt(grads)
|
|
189
|
+
... return loss
|
|
190
|
+
>>>
|
|
191
|
+
>>> data = Tensor(np.random.rand(4, 10, 2), mindspore.dtype.float32)
|
|
192
|
+
>>> label = Tensor(np.random.rand(4, 10, 3), mindspore.dtype.float32)
|
|
193
|
+
>>> train_step(data, label)
|
|
143
194
|
"""
|
|
144
195
|
_support_parallel_optimizer = False
|
|
145
196
|
|
|
@@ -148,6 +199,8 @@ class Optimizer(Cell):
|
|
|
148
199
|
parameters = self._parameters_base_check(parameters, "parameters")
|
|
149
200
|
self.param_rank = None
|
|
150
201
|
self.optim_filter = None
|
|
202
|
+
if not isinstance(parameters, list):
|
|
203
|
+
raise TypeError(f"For 'Optimizer' argument 'parameters' must be 'list', but got {type(parameters)}.")
|
|
151
204
|
if not all(isinstance(x, Parameter) for x in parameters) and not all(isinstance(x, dict) for x in parameters):
|
|
152
205
|
raise TypeError("For 'Optimizer', all elements of the argument 'parameters' must be 'Parameter' or 'dict',"
|
|
153
206
|
" please check the 'parameters'.")
|
|
@@ -231,7 +284,7 @@ class Optimizer(Cell):
|
|
|
231
284
|
self.cache_enable = tuple(cache_filter(x) for x in self._parameters)
|
|
232
285
|
self.reciprocal_scale = Tensor(1.0 / self.loss_scale, mstype.float32)
|
|
233
286
|
self.need_scale = self.loss_scale != 1.0
|
|
234
|
-
self.global_step_increase_tensor = Tensor(1, mstype.int32)
|
|
287
|
+
self.global_step_increase_tensor = Tensor([1], mstype.int32)
|
|
235
288
|
self.param_length = len(self._parameters)
|
|
236
289
|
self.map_ = C.Map()
|
|
237
290
|
self.map_reverse = C.Map(None, True)
|
|
@@ -700,8 +753,6 @@ class Optimizer(Cell):
|
|
|
700
753
|
lr += (current_dynamic_lr,)
|
|
701
754
|
else:
|
|
702
755
|
lr = self.learning_rate(self.global_step).reshape(())
|
|
703
|
-
if self._is_dynamic_lr_or_weight_decay():
|
|
704
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
705
756
|
return lr
|
|
706
757
|
|
|
707
758
|
def get_lr_parameter(self, param):
|
|
@@ -719,8 +770,9 @@ class Optimizer(Cell):
|
|
|
719
770
|
|
|
720
771
|
Examples:
|
|
721
772
|
>>> from mindspore import nn
|
|
722
|
-
>>> #
|
|
723
|
-
>>>
|
|
773
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
774
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
775
|
+
>>> net = LeNet5()
|
|
724
776
|
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
|
|
725
777
|
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
|
|
726
778
|
>>> group_params = [{'params': conv_params, 'lr': 0.05},
|
|
@@ -922,7 +974,7 @@ def rowtensor_deduplicate_indices_slices(grad):
|
|
|
922
974
|
values = grad.values
|
|
923
975
|
|
|
924
976
|
unique_indices, index_position = P.Unique()(indices)
|
|
925
|
-
summed_values = P.UnsortedSegmentSum()(values, index_position, P.
|
|
977
|
+
summed_values = P.UnsortedSegmentSum()(values, index_position, P.Shape()(unique_indices)[0])
|
|
926
978
|
|
|
927
979
|
return RowTensorInner(unique_indices, summed_values, grad.dense_shape)
|
|
928
980
|
|
|
@@ -55,9 +55,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
|
|
|
55
55
|
|
|
56
56
|
class ProximalAdagrad(Optimizer):
|
|
57
57
|
r"""
|
|
58
|
-
Implements the ProximalAdagrad algorithm.
|
|
59
|
-
|
|
60
|
-
ProximalAdagrad is an online Learning and Stochastic Optimization.
|
|
58
|
+
Implements the ProximalAdagrad algorithm that is an online Learning and Stochastic Optimization.
|
|
61
59
|
Refer to paper `Efficient Learning using Forward-Backward Splitting
|
|
62
60
|
<http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
|
|
63
61
|
|
|
@@ -110,8 +108,8 @@ class ProximalAdagrad(Optimizer):
|
|
|
110
108
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
111
109
|
one group of `params`.
|
|
112
110
|
|
|
113
|
-
accum (float): The starting value for accumulators `accum`, must be zero or positive values. Default: 0.1.
|
|
114
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
|
|
111
|
+
accum (float): The starting value for accumulators `accum`, must be zero or positive values. Default: ``0.1`` .
|
|
112
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.001`` .
|
|
115
113
|
|
|
116
114
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
117
115
|
|
|
@@ -125,15 +123,15 @@ class ProximalAdagrad(Optimizer):
|
|
|
125
123
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
126
124
|
LearningRateSchedule with step as the input to get the learning rate of the current step.
|
|
127
125
|
|
|
128
|
-
l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
|
|
129
|
-
l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
|
|
130
|
-
use_locking (bool): If true, use locks for updating operation. Default: False.
|
|
126
|
+
l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: ``0.0`` .
|
|
127
|
+
l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: ``0.0`` .
|
|
128
|
+
use_locking (bool): If true, use locks for updating operation. Default: ``False`` .
|
|
131
129
|
loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
|
|
132
130
|
Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
133
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
131
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
134
132
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
135
|
-
Default: 1.0.
|
|
136
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
133
|
+
Default: ``1.0`` .
|
|
134
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
137
135
|
|
|
138
136
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
139
137
|
|
|
@@ -164,7 +162,9 @@ class ProximalAdagrad(Optimizer):
|
|
|
164
162
|
>>> import mindspore as ms
|
|
165
163
|
>>> from mindspore import nn
|
|
166
164
|
>>>
|
|
167
|
-
>>>
|
|
165
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
166
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
167
|
+
>>> net = LeNet5()
|
|
168
168
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
169
169
|
>>> optim = nn.ProximalAdagrad(params=net.trainable_params())
|
|
170
170
|
>>>
|
|
@@ -182,7 +182,7 @@ class ProximalAdagrad(Optimizer):
|
|
|
182
182
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
183
183
|
>>>
|
|
184
184
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
185
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
185
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
186
186
|
"""
|
|
187
187
|
|
|
188
188
|
@opt_init_args_register
|
|
@@ -207,6 +207,7 @@ class ProximalAdagrad(Optimizer):
|
|
|
207
207
|
grads = self.scale_grad(grads)
|
|
208
208
|
grads = self._grad_sparse_indices_deduplicate(grads)
|
|
209
209
|
lr = self.get_lr()
|
|
210
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
210
211
|
if self.is_group_lr:
|
|
211
212
|
success = self.map_reverse(F.partial(_proximal_ada_grad_opt, self.opt, self.sparse_opt, self.l1, self.l2),
|
|
212
213
|
lr, grads, params, accum)
|
mindspore/nn/optim/rmsprop.py
CHANGED
|
@@ -47,8 +47,8 @@ class RMSProp(Optimizer):
|
|
|
47
47
|
Implements Root Mean Squared Propagation (RMSProp) algorithm.
|
|
48
48
|
|
|
49
49
|
Update `params` according to the RMSProp algorithm.
|
|
50
|
-
The 29th of the original presentation slide
|
|
51
|
-
|
|
50
|
+
The 29th of the original `presentation slide
|
|
51
|
+
<http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ proposes RMSProp.
|
|
52
52
|
The equation is as follows:
|
|
53
53
|
|
|
54
54
|
.. math::
|
|
@@ -120,7 +120,7 @@ class RMSProp(Optimizer):
|
|
|
120
120
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
121
121
|
one group of `params`.
|
|
122
122
|
|
|
123
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.1.
|
|
123
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.1`` .
|
|
124
124
|
|
|
125
125
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
126
126
|
|
|
@@ -134,20 +134,21 @@ class RMSProp(Optimizer):
|
|
|
134
134
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
135
135
|
LearningRateSchedule with step as the input to get the learning rate of the current step.
|
|
136
136
|
|
|
137
|
-
decay (float): Decay rate. Should be equal to or greater than 0. Default: 0.9.
|
|
137
|
+
decay (float): Decay rate. Should be equal to or greater than 0. Default: ``0.9`` .
|
|
138
138
|
momentum (float): Hyperparameter of type float, means momentum for the moving average. Should be equal to or
|
|
139
|
-
greater than 0. Default: 0.0.
|
|
139
|
+
greater than 0. Default: ``0.0`` .
|
|
140
140
|
epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than
|
|
141
|
-
0. Default: 1e-10.
|
|
141
|
+
0. Default: ``1e-10`` .
|
|
142
142
|
use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
|
|
143
|
-
Default: False.
|
|
144
|
-
centered (bool): If True, gradients are normalized by the estimated variance of the gradient.
|
|
143
|
+
Default: ``False`` .
|
|
144
|
+
centered (bool): If True, gradients are normalized by the estimated variance of the gradient.
|
|
145
|
+
Default: ``False`` .
|
|
145
146
|
loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
|
|
146
147
|
default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
147
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
148
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
148
149
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
149
|
-
Default: 1.0.
|
|
150
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
150
|
+
Default: ``1.0`` .
|
|
151
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
151
152
|
|
|
152
153
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
153
154
|
|
|
@@ -178,7 +179,9 @@ class RMSProp(Optimizer):
|
|
|
178
179
|
>>> import mindspore as ms
|
|
179
180
|
>>> from mindspore import nn
|
|
180
181
|
>>>
|
|
181
|
-
>>>
|
|
182
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
183
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
184
|
+
>>> net = LeNet5()
|
|
182
185
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
183
186
|
>>> optim = nn.RMSProp(params=net.trainable_params(), learning_rate=0.1)
|
|
184
187
|
>>>
|
|
@@ -196,7 +199,7 @@ class RMSProp(Optimizer):
|
|
|
196
199
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
197
200
|
>>>
|
|
198
201
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
199
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
202
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
200
203
|
"""
|
|
201
204
|
|
|
202
205
|
@opt_init_args_register
|
|
@@ -233,6 +236,7 @@ class RMSProp(Optimizer):
|
|
|
233
236
|
gradients = self.gradients_centralization(gradients)
|
|
234
237
|
gradients = self.scale_grad(gradients)
|
|
235
238
|
lr = self.get_lr()
|
|
239
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
236
240
|
if self.centered:
|
|
237
241
|
if self.is_group_lr:
|
|
238
242
|
success = self.hyper_map_reverse(F.partial(_centered_rmsprop_opt, self.opt, self.decay, self.epsilon,
|