mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
mindspore/nn/optim/ada_grad.py
CHANGED
|
@@ -112,8 +112,8 @@ class Adagrad(Optimizer):
|
|
|
112
112
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
113
113
|
one group of `params`.
|
|
114
114
|
|
|
115
|
-
accum (float): The starting value for :math:`h`, must be zero or positive values. Default: 0.1.
|
|
116
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
|
|
115
|
+
accum (float): The starting value for :math:`h`, must be zero or positive values. Default: ``0.1`` .
|
|
116
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.001`` .
|
|
117
117
|
|
|
118
118
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
119
119
|
|
|
@@ -127,13 +127,13 @@ class Adagrad(Optimizer):
|
|
|
127
127
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
128
128
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
129
129
|
|
|
130
|
-
update_slots (bool): Whether the :math:`h` will be updated. Default: True.
|
|
130
|
+
update_slots (bool): Whether the :math:`h` will be updated. Default: ``True`` .
|
|
131
131
|
loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
|
|
132
132
|
Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
133
133
|
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
134
134
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
135
|
-
Default: 1.0.
|
|
136
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
135
|
+
Default: ``1.0`` .
|
|
136
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
137
137
|
|
|
138
138
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
139
139
|
|
|
@@ -147,7 +147,7 @@ class Adagrad(Optimizer):
|
|
|
147
147
|
in optimizer.
|
|
148
148
|
|
|
149
149
|
Outputs:
|
|
150
|
-
Tensor[bool], the value is True.
|
|
150
|
+
Tensor[bool], the value is ``True`` .
|
|
151
151
|
|
|
152
152
|
Raises:
|
|
153
153
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -162,10 +162,12 @@ class Adagrad(Optimizer):
|
|
|
162
162
|
``Ascend`` ``GPU`` ``CPU``
|
|
163
163
|
|
|
164
164
|
Examples:
|
|
165
|
-
>>> import mindspore
|
|
166
|
-
>>>
|
|
165
|
+
>>> import mindspore
|
|
166
|
+
>>> import mindspore.nn as nn
|
|
167
167
|
>>>
|
|
168
|
-
>>>
|
|
168
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
169
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
170
|
+
>>> net = LeNet5()
|
|
169
171
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
170
172
|
>>> optim = nn.Adagrad(params=net.trainable_params())
|
|
171
173
|
>>>
|
|
@@ -203,6 +205,7 @@ class Adagrad(Optimizer):
|
|
|
203
205
|
grads = self.gradients_centralization(grads)
|
|
204
206
|
grads = self.scale_grad(grads)
|
|
205
207
|
lr = self.get_lr()
|
|
208
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
206
209
|
if self.is_group_lr:
|
|
207
210
|
success = self.map_reverse(F.partial(_ada_grad_opt, self.opt), lr, params, accum,
|
|
208
211
|
grads)
|
mindspore/nn/optim/adadelta.py
CHANGED
|
@@ -84,8 +84,8 @@ class Adadelta(Optimizer):
|
|
|
84
84
|
to get the weight decay value of current step.
|
|
85
85
|
|
|
86
86
|
- grad_centralization: Optional. Must be Boolean. If "grad_centralization" is in the keys, the set value
|
|
87
|
-
will be used. If not, the `grad_centralization` is False
|
|
88
|
-
convolution layer.
|
|
87
|
+
will be used. If not, the `grad_centralization` is ``False`` by default. This configuration only works
|
|
88
|
+
on the convolution layer.
|
|
89
89
|
|
|
90
90
|
- order_params: Optional. When parameters is grouped, this usually is used to maintain the order of
|
|
91
91
|
parameters that appeared in the network to improve performance. The value should be parameters whose
|
|
@@ -93,7 +93,7 @@ class Adadelta(Optimizer):
|
|
|
93
93
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
94
94
|
one group of `params`.
|
|
95
95
|
|
|
96
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1.0.
|
|
96
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1.0`` .
|
|
97
97
|
|
|
98
98
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
99
99
|
|
|
@@ -107,14 +107,14 @@ class Adadelta(Optimizer):
|
|
|
107
107
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
108
108
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
109
109
|
|
|
110
|
-
rho (float): Decay rate, must be in range [0.0, 1.0]. Default: 0.9.
|
|
111
|
-
epsilon (float): A small value added for numerical stability, must be non-negative. Default: 1e-6.
|
|
110
|
+
rho (float): Decay rate, must be in range [0.0, 1.0]. Default: ``0.9`` .
|
|
111
|
+
epsilon (float): A small value added for numerical stability, must be non-negative. Default: ``1e-6`` .
|
|
112
112
|
loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
|
|
113
113
|
Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
114
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
114
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
115
115
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
116
|
-
Default: 1.0.
|
|
117
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
116
|
+
Default: ``1.0`` .
|
|
117
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
118
118
|
|
|
119
119
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
120
120
|
|
|
@@ -128,7 +128,7 @@ class Adadelta(Optimizer):
|
|
|
128
128
|
the `params` in optimizer. With float16 or float32 data type.
|
|
129
129
|
|
|
130
130
|
Outputs:
|
|
131
|
-
Tensor[bool], the value is True.
|
|
131
|
+
Tensor[bool], the value is ``True`` .
|
|
132
132
|
|
|
133
133
|
Raises:
|
|
134
134
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -143,7 +143,9 @@ class Adadelta(Optimizer):
|
|
|
143
143
|
``Ascend`` ``GPU`` ``CPU``
|
|
144
144
|
|
|
145
145
|
Examples:
|
|
146
|
-
>>>
|
|
146
|
+
>>> import mindspore as ms
|
|
147
|
+
>>> import mindspore.nn as nn
|
|
148
|
+
>>>
|
|
147
149
|
>>> class Net(nn.Cell):
|
|
148
150
|
... def __init__(self):
|
|
149
151
|
... super(Net, self).__init__()
|
|
@@ -171,7 +173,7 @@ class Adadelta(Optimizer):
|
|
|
171
173
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
172
174
|
>>>
|
|
173
175
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
174
|
-
>>> model = Model(net, loss_fn=loss, optimizer=optim)
|
|
176
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
175
177
|
"""
|
|
176
178
|
|
|
177
179
|
@opt_init_args_register
|
|
@@ -192,6 +194,7 @@ class Adadelta(Optimizer):
|
|
|
192
194
|
grads = self.gradients_centralization(grads)
|
|
193
195
|
grads = self.scale_grad(grads)
|
|
194
196
|
lr = self.get_lr()
|
|
197
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
195
198
|
if self.is_group_lr:
|
|
196
199
|
success = self.map_reverse(F.partial(_adadelta_opt, self.opt, self.rho, self.epsilon), lr, params,
|
|
197
200
|
self.accum, self.accum_update, grads)
|
mindspore/nn/optim/adafactor.py
CHANGED
|
@@ -216,27 +216,29 @@ class AdaFactor(Optimizer):
|
|
|
216
216
|
|
|
217
217
|
learning_rate (Union[float, Tensor]): A value or a graph for the learning rate.
|
|
218
218
|
When the learning_rate is a Tensor in a 1D dimension.
|
|
219
|
-
If the type of `learning_rate` is int, it will be converted to float. Default: None.
|
|
219
|
+
If the type of `learning_rate` is int, it will be converted to float. Default: ``None`` .
|
|
220
220
|
eps (tuple): The regularization constans for square gradient and parameter scale respectively.
|
|
221
|
-
default: (1e-30, 1e-3)
|
|
222
|
-
clip_threshold (Union[float, Tensor]): The threshold of root mean square of final gradient update.
|
|
221
|
+
default: ``(1e-30, 1e-3)`` .
|
|
222
|
+
clip_threshold (Union[float, Tensor]): The threshold of root mean square of final gradient update.
|
|
223
|
+
default: ``1.0``.
|
|
223
224
|
decay_rate (Union[float, Tensor]): The coefficient used to compute running averages of square gradient.
|
|
224
|
-
default: 0.8
|
|
225
|
+
default: ``0.8`` .
|
|
225
226
|
beta1 (float): The coefficient to computing running averages of gradient. Should be in range (0.0, 1.0).
|
|
226
|
-
|
|
227
|
-
weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
|
|
228
|
-
scale_parameter (bool): If True, learning rate is scaled by root mean square of parameter.
|
|
227
|
+
Default: ``None`` .
|
|
228
|
+
weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: ``0.0`` .
|
|
229
|
+
scale_parameter (bool): If True, learning rate is scaled by root mean square of parameter.
|
|
230
|
+
default: ``True`` .
|
|
229
231
|
relative_step (bool): If True, time-dependent learning rate is computed instead of external learning rate.
|
|
230
|
-
default: True
|
|
232
|
+
default: ``True`` .
|
|
231
233
|
warmup_init (bool): The time-dependent learning rate computation depends on whether warm-up
|
|
232
|
-
initialization is being used. default: False
|
|
234
|
+
initialization is being used. default: ``False`` .
|
|
233
235
|
compression (bool): If True, the data type of the running averages exponent will be compression to float16.
|
|
234
|
-
default: False
|
|
236
|
+
default: ``False`` .
|
|
235
237
|
loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
|
|
236
238
|
default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
237
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
239
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
238
240
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
239
|
-
Default: 1.0.
|
|
241
|
+
Default: ``1.0`` .
|
|
240
242
|
|
|
241
243
|
Inputs:
|
|
242
244
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
@@ -261,7 +263,9 @@ class AdaFactor(Optimizer):
|
|
|
261
263
|
>>> import mindspore as ms
|
|
262
264
|
>>> from mindspore import nn
|
|
263
265
|
>>>
|
|
264
|
-
>>>
|
|
266
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
267
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
268
|
+
>>> net = LeNet5()
|
|
265
269
|
>>> #1) Parameters use the default learning rate with None and weight decay with 0.
|
|
266
270
|
>>> optim = nn.AdaFactor(params=net.trainable_params())
|
|
267
271
|
>>>
|
|
@@ -329,7 +333,6 @@ class AdaFactor(Optimizer):
|
|
|
329
333
|
self.weight_decay = trans_to_tensor(weight_decay)
|
|
330
334
|
self.weight_decay_flag = bool(weight_decay)
|
|
331
335
|
|
|
332
|
-
self.step = Parameter(Tensor(0, dtype=mstype.float32), name="train_step")
|
|
333
336
|
self.scale_parameter = scale_parameter
|
|
334
337
|
self.relative_step = relative_step
|
|
335
338
|
self.warmup_init = warmup_init
|
|
@@ -407,8 +410,8 @@ class AdaFactor(Optimizer):
|
|
|
407
410
|
def construct(self, gradients):
|
|
408
411
|
gradients = self.flatten_gradients(gradients)
|
|
409
412
|
lr = self.get_lr()
|
|
410
|
-
|
|
411
|
-
step = self.step
|
|
413
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
414
|
+
step = F.assign_add(self.step, 1)
|
|
412
415
|
if self.scale_lr and self.relative_step:
|
|
413
416
|
if self.warmup_init:
|
|
414
417
|
min_step = 1e-6 * step
|
mindspore/nn/optim/adam.py
CHANGED
|
@@ -499,7 +499,8 @@ def _run_opt_with_one_number_use_amsgrad(opt, sparse_opt, push, pull,
|
|
|
499
499
|
(op_shape(param), op_shape(moment1), op_shape(moment2),
|
|
500
500
|
op_shape(vhat))), param))
|
|
501
501
|
else:
|
|
502
|
-
success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power,
|
|
502
|
+
success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power,
|
|
503
|
+
lr, beta1, beta2, eps, gradient))
|
|
503
504
|
return success
|
|
504
505
|
|
|
505
506
|
|
|
@@ -634,7 +635,7 @@ class Adam(Optimizer):
|
|
|
634
635
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
635
636
|
one group of `params`.
|
|
636
637
|
|
|
637
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
|
|
638
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
|
|
638
639
|
|
|
639
640
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
640
641
|
|
|
@@ -649,22 +650,22 @@ class Adam(Optimizer):
|
|
|
649
650
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
650
651
|
|
|
651
652
|
beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
|
|
652
|
-
Default: 0.9.
|
|
653
|
+
Default: ``0.9`` .
|
|
653
654
|
beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
|
|
654
|
-
Default: 0.999.
|
|
655
|
-
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
656
|
-
1e-8.
|
|
655
|
+
Default: ``0.999`` .
|
|
656
|
+
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
657
|
+
Default: ``1e-8`` .
|
|
657
658
|
use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
|
|
658
|
-
If true, updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
659
|
-
If false, the result is unpredictable. Default: False.
|
|
659
|
+
If ``true`` , updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
660
|
+
If ``false`` , the result is unpredictable. Default: ``False`` .
|
|
660
661
|
use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
|
|
661
|
-
If true, update the gradients using NAG.
|
|
662
|
-
If false, update the gradients without using NAG. Default: False.
|
|
662
|
+
If ``true`` , update the gradients using NAG.
|
|
663
|
+
If ``false`` , update the gradients without using NAG. Default: ``False`` .
|
|
663
664
|
use_amsgrad (bool): Whether to use Amsgrad algorithm to update the gradients.
|
|
664
|
-
If true, update the gradients using Amsgrad.
|
|
665
|
-
If false, update the gradients without using Amsgrad. Default: False.
|
|
665
|
+
If ``true`` , update the gradients using Amsgrad.
|
|
666
|
+
If ``false`` , update the gradients without using Amsgrad. Default: ``False`` .
|
|
666
667
|
|
|
667
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
668
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
668
669
|
|
|
669
670
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
670
671
|
|
|
@@ -681,14 +682,14 @@ class Adam(Optimizer):
|
|
|
681
682
|
|
|
682
683
|
kwargs:
|
|
683
684
|
|
|
684
|
-
- use_lazy (bool): Whether to use Lazy Adam algorithm. Default: False.
|
|
685
|
-
If true, apply lazy adam algorithm.
|
|
686
|
-
If false, apply normal adam algorithm.
|
|
685
|
+
- use_lazy (bool): Whether to use Lazy Adam algorithm. Default: ``False`` .
|
|
686
|
+
If ``true`` , apply lazy adam algorithm.
|
|
687
|
+
If ``false`` , apply normal adam algorithm.
|
|
687
688
|
|
|
688
689
|
- use_offload (bool): Whether to offload adam optimizer to host CPU and keep parameters being updated on
|
|
689
|
-
the device in order to minimize the memory cost. Default: False.
|
|
690
|
-
If true, apply offload adam.
|
|
691
|
-
If false, apply normal adam.
|
|
690
|
+
the device in order to minimize the memory cost. Default: ``False`` .
|
|
691
|
+
If ``true`` , apply offload adam.
|
|
692
|
+
If ``false`` , apply normal adam.
|
|
692
693
|
|
|
693
694
|
Inputs:
|
|
694
695
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
@@ -697,6 +698,7 @@ class Adam(Optimizer):
|
|
|
697
698
|
Tensor[bool], the value is True.
|
|
698
699
|
|
|
699
700
|
Raises:
|
|
701
|
+
KeyError: If kwargs got keys other than 'use_lazy' or 'use_offload'.
|
|
700
702
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
701
703
|
TypeError: If element of `parameters` is neither Parameter nor dict.
|
|
702
704
|
TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
|
|
@@ -705,8 +707,8 @@ class Adam(Optimizer):
|
|
|
705
707
|
ValueError: If `loss_scale` or `eps` is less than or equal to 0.
|
|
706
708
|
ValueError: If `beta1`, `beta2` is not in range (0.0, 1.0).
|
|
707
709
|
ValueError: If `weight_decay` is less than 0.
|
|
708
|
-
ValueError: If `use_lazy` and `use_offload` are both true.
|
|
709
|
-
ValueError: If `use_amsgrad` is true and (`use_lazy` or `use_offload` is true).
|
|
710
|
+
ValueError: If `use_lazy` and `use_offload` are both ``true`` .
|
|
711
|
+
ValueError: If `use_amsgrad` is ``true`` and (`use_lazy` or `use_offload` is ``true`` ).
|
|
710
712
|
ValueError: If `use_amsgrad` while using distributed training.
|
|
711
713
|
|
|
712
714
|
Supported Platforms:
|
|
@@ -716,7 +718,9 @@ class Adam(Optimizer):
|
|
|
716
718
|
>>> import mindspore as ms
|
|
717
719
|
>>> from mindspore import nn
|
|
718
720
|
>>>
|
|
719
|
-
>>>
|
|
721
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
722
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
723
|
+
>>> net = LeNet5()
|
|
720
724
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
721
725
|
>>> optim = nn.Adam(params=net.trainable_params())
|
|
722
726
|
>>>
|
|
@@ -741,6 +745,10 @@ class Adam(Optimizer):
|
|
|
741
745
|
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
|
|
742
746
|
use_nesterov=False, weight_decay=0.0, loss_scale=1.0, use_amsgrad=False, **kwargs):
|
|
743
747
|
super(Adam, self).__init__(learning_rate, params, weight_decay, loss_scale)
|
|
748
|
+
valid_keys = {'use_lazy', 'use_offload'}
|
|
749
|
+
if set(kwargs.keys()) - valid_keys:
|
|
750
|
+
raise KeyError(f"For 'Adam', invalid keys are passed as kwargs, supported keys are 'use_lazy' and"
|
|
751
|
+
f"'use_offload', but got {kwargs.keys()}.")
|
|
744
752
|
use_lazy = kwargs.get('use_lazy', False)
|
|
745
753
|
use_offload = kwargs.get('use_offload', False)
|
|
746
754
|
_check_param_value(beta1, beta2, eps, self.cls_name)
|
|
@@ -790,7 +798,7 @@ class Adam(Optimizer):
|
|
|
790
798
|
else:
|
|
791
799
|
self._is_device = True
|
|
792
800
|
if use_amsgrad:
|
|
793
|
-
self.opt = P.
|
|
801
|
+
self.opt = P.ApplyAdamWithAmsgradV2(use_locking)
|
|
794
802
|
else:
|
|
795
803
|
self.opt = P.Adam(use_locking, use_nesterov)
|
|
796
804
|
self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
|
|
@@ -910,6 +918,7 @@ class Adam(Optimizer):
|
|
|
910
918
|
gradients = self.scale_grad(gradients)
|
|
911
919
|
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
912
920
|
lr = self.get_lr()
|
|
921
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
913
922
|
|
|
914
923
|
beta1_power = self.beta1_power * self.beta1
|
|
915
924
|
self.beta1_power = beta1_power
|
|
@@ -977,7 +986,7 @@ class AdamWeightDecay(Optimizer):
|
|
|
977
986
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
978
987
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
979
988
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
|
|
980
|
-
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.
|
|
989
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
|
|
981
990
|
process `loss_scale` correctly.
|
|
982
991
|
|
|
983
992
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -1008,7 +1017,7 @@ class AdamWeightDecay(Optimizer):
|
|
|
1008
1017
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
1009
1018
|
one group of `params`.
|
|
1010
1019
|
|
|
1011
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
|
|
1020
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
|
|
1012
1021
|
|
|
1013
1022
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
1014
1023
|
|
|
@@ -1022,14 +1031,14 @@ class AdamWeightDecay(Optimizer):
|
|
|
1022
1031
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
1023
1032
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
1024
1033
|
|
|
1025
|
-
beta1 (float): The exponential decay rate for the 1st moment estimations. Default: 0.9.
|
|
1034
|
+
beta1 (float): The exponential decay rate for the 1st moment estimations. Default: ``0.9`` .
|
|
1026
1035
|
Should be in range (0.0, 1.0).
|
|
1027
|
-
beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: 0.999.
|
|
1036
|
+
beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: ``0.999`` .
|
|
1028
1037
|
Should be in range (0.0, 1.0).
|
|
1029
|
-
eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
|
|
1038
|
+
eps (float): Term added to the denominator to improve numerical stability. Default: ``1e-6`` .
|
|
1030
1039
|
Should be greater than 0.
|
|
1031
1040
|
|
|
1032
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
1041
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
1033
1042
|
|
|
1034
1043
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
1035
1044
|
|
|
@@ -1060,7 +1069,9 @@ class AdamWeightDecay(Optimizer):
|
|
|
1060
1069
|
>>> import mindspore as ms
|
|
1061
1070
|
>>> from mindspore import nn
|
|
1062
1071
|
>>>
|
|
1063
|
-
>>>
|
|
1072
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
1073
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
1074
|
+
>>> net = LeNet5()
|
|
1064
1075
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
1065
1076
|
>>> optim = nn.AdamWeightDecay(params=net.trainable_params())
|
|
1066
1077
|
>>>
|
|
@@ -1089,16 +1100,17 @@ class AdamWeightDecay(Optimizer):
|
|
|
1089
1100
|
self.moments1 = self._parameters.clone(prefix="adam_m", init='zeros')
|
|
1090
1101
|
self.moments2 = self._parameters.clone(prefix="adam_v", init='zeros')
|
|
1091
1102
|
self.fused_opt = P.AdamWeightDecay()
|
|
1092
|
-
if context.get_context("device_target") == "
|
|
1093
|
-
self.use_fused_opt = True
|
|
1094
|
-
else:
|
|
1103
|
+
if context.get_context("device_target") == "Ascend":
|
|
1095
1104
|
self.use_fused_opt = False
|
|
1105
|
+
else:
|
|
1106
|
+
self.use_fused_opt = True
|
|
1096
1107
|
|
|
1097
1108
|
@jit
|
|
1098
1109
|
def construct(self, gradients):
|
|
1099
1110
|
gradients = self.flatten_gradients(gradients)
|
|
1100
1111
|
weight_decay = self.get_weight_decay()
|
|
1101
1112
|
lr = self.get_lr()
|
|
1113
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
1102
1114
|
|
|
1103
1115
|
if self.use_fused_opt:
|
|
1104
1116
|
if self.is_group:
|
|
@@ -1206,7 +1218,7 @@ class AdamOffload(Optimizer):
|
|
|
1206
1218
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
1207
1219
|
one group of `params`.
|
|
1208
1220
|
|
|
1209
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
|
|
1221
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
|
|
1210
1222
|
|
|
1211
1223
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
1212
1224
|
|
|
@@ -1221,19 +1233,19 @@ class AdamOffload(Optimizer):
|
|
|
1221
1233
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
1222
1234
|
|
|
1223
1235
|
beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
|
|
1224
|
-
Default: 0.9.
|
|
1236
|
+
Default: ``0.9`` .
|
|
1225
1237
|
beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
|
|
1226
|
-
Default: 0.999.
|
|
1227
|
-
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
1228
|
-
|
|
1238
|
+
Default: ``0.999`` .
|
|
1239
|
+
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
1240
|
+
Default: ``1e-8`` .
|
|
1229
1241
|
use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
|
|
1230
|
-
If true, updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
1231
|
-
If false, the result is unpredictable. Default: False.
|
|
1242
|
+
If ``true`` , updates of the `w`, `m`, and `v` tensors will be protected by a lock.
|
|
1243
|
+
If ``false`` , the result is unpredictable. Default: ``False`` .
|
|
1232
1244
|
use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
|
|
1233
|
-
If true, update the gradients using NAG.
|
|
1234
|
-
If false, update the gradients without using NAG. Default: False.
|
|
1245
|
+
If ``true`` , update the gradients using NAG.
|
|
1246
|
+
If ``false`` , update the gradients without using NAG. Default: ``False`` .
|
|
1235
1247
|
|
|
1236
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
1248
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
1237
1249
|
|
|
1238
1250
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
1239
1251
|
|
|
@@ -1244,15 +1256,15 @@ class AdamOffload(Optimizer):
|
|
|
1244
1256
|
|
|
1245
1257
|
loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
|
|
1246
1258
|
default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
1247
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
1259
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
1248
1260
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
1249
|
-
Default: 1.0.
|
|
1261
|
+
Default: ``1.0`` .
|
|
1250
1262
|
|
|
1251
1263
|
Inputs:
|
|
1252
1264
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
1253
1265
|
|
|
1254
1266
|
Outputs:
|
|
1255
|
-
Tensor[bool], the value is True.
|
|
1267
|
+
Tensor[bool], the value is ``True`` .
|
|
1256
1268
|
|
|
1257
1269
|
Raises:
|
|
1258
1270
|
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
|
|
@@ -1271,7 +1283,9 @@ class AdamOffload(Optimizer):
|
|
|
1271
1283
|
>>> import mindspore as ms
|
|
1272
1284
|
>>> from mindspore import nn
|
|
1273
1285
|
>>>
|
|
1274
|
-
>>>
|
|
1286
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
1287
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
1288
|
+
>>> net = LeNet5()
|
|
1275
1289
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
1276
1290
|
>>> optim = nn.AdamOffload(params=net.trainable_params())
|
|
1277
1291
|
>>>
|
|
@@ -1318,6 +1332,7 @@ class AdamOffload(Optimizer):
|
|
|
1318
1332
|
gradients = self.decay_weight(gradients)
|
|
1319
1333
|
gradients = self.scale_grad(gradients)
|
|
1320
1334
|
lr = self.get_lr()
|
|
1335
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
1321
1336
|
|
|
1322
1337
|
beta1_power = self.beta1_power * self.beta1
|
|
1323
1338
|
self.beta1_power = beta1_power
|
mindspore/nn/optim/adamax.py
CHANGED
|
@@ -66,7 +66,7 @@ class AdaMax(Optimizer):
|
|
|
66
66
|
:math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector,
|
|
67
67
|
:math:`g` represents `gradients`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
|
|
68
68
|
:math:`t` represents the current step, :math:`beta_1^t` represent `beta1_power`,
|
|
69
|
-
:math
|
|
69
|
+
:math:`l` represents `learning_rate`, :math:`w` represents `params`,
|
|
70
70
|
:math:`\epsilon` represents `eps`.
|
|
71
71
|
|
|
72
72
|
Note:
|
|
@@ -102,7 +102,7 @@ class AdaMax(Optimizer):
|
|
|
102
102
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
103
103
|
one group of `params`.
|
|
104
104
|
|
|
105
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
|
|
105
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.001`` .
|
|
106
106
|
|
|
107
107
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
108
108
|
|
|
@@ -117,13 +117,13 @@ class AdaMax(Optimizer):
|
|
|
117
117
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
118
118
|
|
|
119
119
|
beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
|
|
120
|
-
Default: 0.9.
|
|
120
|
+
Default: ``0.9`` .
|
|
121
121
|
beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
|
|
122
|
-
Default: 0.999.
|
|
123
|
-
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
124
|
-
1e-
|
|
122
|
+
Default: ``0.999`` .
|
|
123
|
+
eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
|
|
124
|
+
Default: ``1e-08`` .
|
|
125
125
|
|
|
126
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
126
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
127
127
|
|
|
128
128
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
129
129
|
|
|
@@ -134,9 +134,9 @@ class AdaMax(Optimizer):
|
|
|
134
134
|
|
|
135
135
|
loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
|
|
136
136
|
default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
137
|
-
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
137
|
+
`FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
|
|
138
138
|
`FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
|
|
139
|
-
Default: 1.0.
|
|
139
|
+
Default: ``1.0`` .
|
|
140
140
|
|
|
141
141
|
Inputs:
|
|
142
142
|
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
|
|
@@ -160,7 +160,9 @@ class AdaMax(Optimizer):
|
|
|
160
160
|
>>> import mindspore as ms
|
|
161
161
|
>>> from mindspore import nn
|
|
162
162
|
>>>
|
|
163
|
-
>>>
|
|
163
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
164
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
165
|
+
>>> net = LeNet5()
|
|
164
166
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
165
167
|
>>> optim = nn.AdaMax(params=net.trainable_params())
|
|
166
168
|
>>>
|
|
@@ -202,6 +204,7 @@ class AdaMax(Optimizer):
|
|
|
202
204
|
gradients = self.gradients_centralization(gradients)
|
|
203
205
|
gradients = self.scale_grad(gradients)
|
|
204
206
|
lr = self.get_lr()
|
|
207
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
205
208
|
|
|
206
209
|
self.beta1_power *= self.beta1
|
|
207
210
|
|
mindspore/nn/optim/adasum.py
CHANGED
|
@@ -442,12 +442,14 @@ class AdaSumByGradWrapCell(Cell):
|
|
|
442
442
|
``Ascend`` ``GPU``
|
|
443
443
|
|
|
444
444
|
Examples:
|
|
445
|
+
>>> import mindspore as ms
|
|
445
446
|
>>> from mindspore import nn
|
|
446
|
-
>>>
|
|
447
|
-
>>>
|
|
448
|
-
>>>
|
|
447
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
448
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
449
|
+
>>> net = LeNet5()
|
|
450
|
+
>>> optim = nn.AdaSumByGradWrapCell(nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9))
|
|
449
451
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
450
|
-
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
452
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
451
453
|
"""
|
|
452
454
|
def __init__(self, optimizer):
|
|
453
455
|
super(AdaSumByGradWrapCell, self).__init__(auto_prefix=False)
|
|
@@ -509,13 +511,15 @@ class AdaSumByDeltaWeightWrapCell(Cell):
|
|
|
509
511
|
``Ascend`` ``GPU``
|
|
510
512
|
|
|
511
513
|
Examples:
|
|
514
|
+
>>> import mindspore as ms
|
|
512
515
|
>>> from mindspore import nn
|
|
513
|
-
>>>
|
|
514
|
-
>>>
|
|
515
|
-
>>>
|
|
516
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
517
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
518
|
+
>>> net = LeNet5()
|
|
519
|
+
>>> optim = nn.AdaSumByDeltaWeightWrapCell(nn.Momentum(params=net.trainable_params(),
|
|
516
520
|
... learning_rate=0.1, momentum=0.9))
|
|
517
521
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
518
|
-
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
522
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
|
|
519
523
|
"""
|
|
520
524
|
def __init__(self, optimizer):
|
|
521
525
|
super(AdaSumByDeltaWeightWrapCell, self).__init__(auto_prefix=False)
|
mindspore/nn/optim/asgd.py
CHANGED
|
@@ -81,7 +81,7 @@ class ASGD(Optimizer):
|
|
|
81
81
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
82
82
|
one group of `params`.
|
|
83
83
|
|
|
84
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): learning_rate. Default: 0.1.
|
|
84
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): learning_rate. Default: ``0.1`` .
|
|
85
85
|
|
|
86
86
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
87
87
|
|
|
@@ -95,10 +95,10 @@ class ASGD(Optimizer):
|
|
|
95
95
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
96
96
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
97
97
|
|
|
98
|
-
lambd (float): The decay term. Default: 1e-4.
|
|
99
|
-
alpha (float): The power for :math:`\eta` update. Default: 0.75.
|
|
100
|
-
t0 (float): The point of starting averaging. Default: 1e6.
|
|
101
|
-
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
98
|
+
lambd (float): The decay term. Default: ``1e-4`` .
|
|
99
|
+
alpha (float): The power for :math:`\eta` update. Default: ``0.75`` .
|
|
100
|
+
t0 (float): The point of starting averaging. Default: ``1e6`` .
|
|
101
|
+
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
|
|
102
102
|
|
|
103
103
|
- float: The fixed weight decay value. Must be equal to or greater than 0.
|
|
104
104
|
|
|
@@ -127,7 +127,9 @@ class ASGD(Optimizer):
|
|
|
127
127
|
>>> import mindspore as ms
|
|
128
128
|
>>> from mindspore import nn
|
|
129
129
|
>>>
|
|
130
|
-
>>>
|
|
130
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
131
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
132
|
+
>>> net = LeNet5()
|
|
131
133
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
132
134
|
>>> optim = nn.ASGD(params=net.trainable_params())
|
|
133
135
|
>>>
|
|
@@ -145,7 +147,7 @@ class ASGD(Optimizer):
|
|
|
145
147
|
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
|
|
146
148
|
>>>
|
|
147
149
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
148
|
-
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
150
|
+
>>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
|
|
149
151
|
"""
|
|
150
152
|
|
|
151
153
|
@opt_init_args_register
|
|
@@ -183,8 +185,7 @@ class ASGD(Optimizer):
|
|
|
183
185
|
gradients = self.gradients_centralization(gradients)
|
|
184
186
|
gradients = self.scale_grad(gradients)
|
|
185
187
|
lrs = self.get_lr()
|
|
186
|
-
|
|
187
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
188
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
188
189
|
success = True
|
|
189
190
|
params = self._parameters
|
|
190
191
|
for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, params, self.mu, self.eta, self.ax)):
|