mindspore 2.0.0a0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -2
- mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/amp.py +52 -57
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
mindspore/nn/optim/ada_grad.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
|
-
from mindspore
|
|
19
|
+
from mindspore import _checkparam as validator
|
|
20
20
|
from mindspore.common.api import jit
|
|
21
21
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
22
22
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
@@ -53,7 +53,7 @@ class Adagrad(Optimizer):
|
|
|
53
53
|
The updating Pseudo codes are as follows:
|
|
54
54
|
|
|
55
55
|
.. math::
|
|
56
|
-
|
|
56
|
+
\begin{aligned} \\
|
|
57
57
|
&\newline
|
|
58
58
|
&\hline \\
|
|
59
59
|
&\textbf{Parameters}: \text{learning rate } \gamma, \: \text{ params } w_0, \:
|
|
@@ -73,7 +73,7 @@ class Adagrad(Optimizer):
|
|
|
73
73
|
&\bf{return} \: w_t \\[-1.ex]
|
|
74
74
|
&\newline
|
|
75
75
|
&\hline \\
|
|
76
|
-
|
|
76
|
+
\end{aligned}
|
|
77
77
|
|
|
78
78
|
:math:`state\_sum` stands for the accumulated squared sum of the gradients :math:`accum`.
|
|
79
79
|
:math:`g` stands for `grads`, :math:`\lambda` stands for `weight_decay`.
|
|
@@ -112,7 +112,7 @@ class Adagrad(Optimizer):
|
|
|
112
112
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
113
113
|
one group of `params`.
|
|
114
114
|
|
|
115
|
-
accum (float): The starting value for
|
|
115
|
+
accum (float): The starting value for :math:`h`, must be zero or positive values. Default: 0.1.
|
|
116
116
|
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
|
|
117
117
|
|
|
118
118
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
@@ -127,7 +127,7 @@ class Adagrad(Optimizer):
|
|
|
127
127
|
- LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
|
|
128
128
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
129
129
|
|
|
130
|
-
update_slots (bool): Whether the
|
|
130
|
+
update_slots (bool): Whether the :math:`h` will be updated. Default: True.
|
|
131
131
|
loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
|
|
132
132
|
Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
|
|
133
133
|
`FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
|
|
@@ -159,7 +159,7 @@ class Adagrad(Optimizer):
|
|
|
159
159
|
ValueError: If `accum` or `weight_decay` is less than 0.
|
|
160
160
|
|
|
161
161
|
Supported Platforms:
|
|
162
|
-
``Ascend`` ``
|
|
162
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
163
163
|
|
|
164
164
|
Examples:
|
|
165
165
|
>>> import mindspore as ms
|
mindspore/nn/optim/adadelta.py
CHANGED
|
@@ -16,8 +16,7 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
|
-
from mindspore
|
|
20
|
-
from mindspore._checkparam import Rel
|
|
19
|
+
from mindspore import _checkparam as validator
|
|
21
20
|
from mindspore.common.tensor import Tensor
|
|
22
21
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
23
22
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
@@ -37,7 +36,7 @@ def _check_param_value(rho, epsilon, prim_name=None):
|
|
|
37
36
|
"""Check inputs param."""
|
|
38
37
|
validator.check_value_type("rho", rho, [float], prim_name)
|
|
39
38
|
validator.check_value_type("epsilon", epsilon, [float], prim_name)
|
|
40
|
-
validator.check_float_range(rho, 0.0, 1.0,
|
|
39
|
+
validator.check_float_range(rho, 0.0, 1.0, validator.INC_BOTH, "rho", prim_name)
|
|
41
40
|
validator.check_non_negative_float(epsilon, "epsilon", prim_name)
|
|
42
41
|
|
|
43
42
|
|
mindspore/nn/optim/adafactor.py
CHANGED
|
@@ -25,8 +25,7 @@ from mindspore.ops import composite as C
|
|
|
25
25
|
from mindspore.ops import functional as F
|
|
26
26
|
from mindspore.common.parameter import Parameter, ParameterTuple
|
|
27
27
|
from mindspore.common.tensor import Tensor
|
|
28
|
-
from mindspore
|
|
29
|
-
from mindspore._checkparam import Rel
|
|
28
|
+
from mindspore import _checkparam as validator
|
|
30
29
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
31
30
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
32
31
|
|
|
@@ -153,7 +152,7 @@ class AdaFactor(Optimizer):
|
|
|
153
152
|
Cost <https://arxiv.org/abs/1804.04235>`_.
|
|
154
153
|
|
|
155
154
|
.. warning::
|
|
156
|
-
This is an experimental
|
|
155
|
+
This is an experimental API that is subject to change or deletion.
|
|
157
156
|
|
|
158
157
|
Adafactor for weight vector are as follows,
|
|
159
158
|
|
|
@@ -316,8 +315,8 @@ class AdaFactor(Optimizer):
|
|
|
316
315
|
validator.check_value_type("clip_threshold", clip_threshold, [float], self.cls_name)
|
|
317
316
|
validator.check_non_negative_float(clip_threshold, "clip_threshold", self.cls_name)
|
|
318
317
|
validator.check_value_type("decay_rate", decay_rate, [float], self.cls_name)
|
|
319
|
-
validator.check_float_range(decay_rate, 0, 1,
|
|
320
|
-
validator.check_float_range(weight_decay, 0, 1,
|
|
318
|
+
validator.check_float_range(decay_rate, 0, 1, validator.INC_NEITHER, "decay_rate", self.cls_name)
|
|
319
|
+
validator.check_float_range(weight_decay, 0, 1, validator.INC_LEFT, "weight_decay", self.cls_name)
|
|
321
320
|
validator.check_value_type("scale_parameter", scale_parameter, [bool], self.cls_name)
|
|
322
321
|
validator.check_value_type("relative_step", relative_step, [bool], self.cls_name)
|
|
323
322
|
validator.check_value_type("compression", compression, [bool], self.cls_name)
|
mindspore/nn/optim/adam.py
CHANGED
|
@@ -26,8 +26,7 @@ from mindspore.ops import composite as C
|
|
|
26
26
|
from mindspore.ops import functional as F
|
|
27
27
|
from mindspore.common.parameter import Parameter
|
|
28
28
|
from mindspore.common.tensor import Tensor
|
|
29
|
-
from mindspore
|
|
30
|
-
from mindspore._checkparam import Rel
|
|
29
|
+
from mindspore import _checkparam as validator
|
|
31
30
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
32
31
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
33
32
|
from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
|
|
@@ -93,6 +92,46 @@ def _run_lazy_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use
|
|
|
93
92
|
return success
|
|
94
93
|
|
|
95
94
|
|
|
95
|
+
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
96
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
|
|
97
|
+
"Bool", "Bool", "Function", "Bool", "Function", "Bool")
|
|
98
|
+
def _run_map_tensor_lazy_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target,
|
|
99
|
+
beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, m, v,
|
|
100
|
+
ps_parameter, cache_enable, distributed_opt, use_flag,
|
|
101
|
+
distributed_sparse_opt, use_sparse_flag):
|
|
102
|
+
"""Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
|
|
103
|
+
success = True
|
|
104
|
+
indices, values = gradient.get_data()
|
|
105
|
+
if use_sparse_flag:
|
|
106
|
+
# PS Mode.
|
|
107
|
+
success = F.depend(success, distributed_sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
|
|
108
|
+
eps, values, indices))
|
|
109
|
+
else:
|
|
110
|
+
# PS Cache mode.
|
|
111
|
+
op_sqrt = P.Sqrt()
|
|
112
|
+
|
|
113
|
+
m_slice = m.get(indices)
|
|
114
|
+
v_slice = v.get(indices)
|
|
115
|
+
|
|
116
|
+
next_m = m_slice * beta1 + values * (1 - beta1)
|
|
117
|
+
next_v = v_slice * beta2 + values * values * (1 - beta2)
|
|
118
|
+
|
|
119
|
+
lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
|
|
120
|
+
|
|
121
|
+
if use_nesterov:
|
|
122
|
+
m_temp = beta1 * next_m + values * (1 - beta1)
|
|
123
|
+
param_update = m_temp / (op_sqrt(next_v) + eps)
|
|
124
|
+
else:
|
|
125
|
+
param_update = next_m / (op_sqrt(next_v) + eps)
|
|
126
|
+
|
|
127
|
+
params_need_update = params.get(indices)
|
|
128
|
+
params.put(indices, params_need_update - lr_t * param_update)
|
|
129
|
+
m.put(indices, next_m)
|
|
130
|
+
v.put(indices, next_v)
|
|
131
|
+
|
|
132
|
+
return success
|
|
133
|
+
|
|
134
|
+
|
|
96
135
|
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
97
136
|
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool",
|
|
98
137
|
"Function", "Bool", "Function", "Bool")
|
|
@@ -360,18 +399,14 @@ def _run_opt_with_one_number_dist(opt, sparse_opt, push, pull, use_locking, use_
|
|
|
360
399
|
|
|
361
400
|
|
|
362
401
|
@_adam_opt.register("Function", "Function", "Function", "Function",
|
|
363
|
-
"Bool", "Bool", "Bool",
|
|
402
|
+
"Bool", "Bool", "Bool",
|
|
364
403
|
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
|
|
365
|
-
"RowTensor", "Tensor", "Tensor", "Tensor", "
|
|
404
|
+
"RowTensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
|
|
366
405
|
def _run_opt_with_sparse(opt, sparse_opt, push, pull,
|
|
367
|
-
use_locking, use_nesterov,
|
|
406
|
+
use_locking, use_nesterov, target,
|
|
368
407
|
beta1_power, beta2_power, beta1, beta2, eps, lr,
|
|
369
|
-
gradient, param, m, v,
|
|
408
|
+
gradient, param, m, v, ps_parameter, cache_enable):
|
|
370
409
|
"""Apply sparse adam optimizer to the weight parameter when the gradient is sparse."""
|
|
371
|
-
if use_amsgrad:
|
|
372
|
-
raise Exception("""Adam with amsgrad is currently not supported when the gradients are sparse!
|
|
373
|
-
Please set use_amsgrad=False for sparse gradients.""")
|
|
374
|
-
|
|
375
410
|
success = True
|
|
376
411
|
indices = gradient.indices
|
|
377
412
|
values = gradient.values
|
|
@@ -429,30 +464,42 @@ def _run_opt_with_sparse(opt, sparse_opt, push, pull,
|
|
|
429
464
|
|
|
430
465
|
|
|
431
466
|
@_adam_opt.register("Function", "Function", "Function", "Function",
|
|
432
|
-
"Bool", "Bool", "Bool",
|
|
467
|
+
"Bool", "Bool", "Bool",
|
|
433
468
|
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
|
|
434
|
-
"Tensor", "Tensor", "Tensor", "Tensor", "
|
|
469
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
|
|
435
470
|
def _run_opt_with_one_number(opt, sparse_opt, push, pull,
|
|
436
|
-
use_locking, use_nesterov,
|
|
471
|
+
use_locking, use_nesterov, target,
|
|
437
472
|
beta1_power, beta2_power, beta1, beta2, eps, lr,
|
|
438
|
-
gradient, param, moment1, moment2,
|
|
473
|
+
gradient, param, moment1, moment2, ps_parameter, cache_enable):
|
|
439
474
|
"""Apply adam optimizer to the weight parameter using Tensor."""
|
|
440
475
|
success = True
|
|
441
476
|
if ps_parameter and not cache_enable:
|
|
442
477
|
op_shape = P.Shape()
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
(op_shape(param), op_shape(moment1), op_shape(moment2),
|
|
446
|
-
op_shape(vhat))), param))
|
|
447
|
-
else:
|
|
448
|
-
success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
|
|
449
|
-
(op_shape(param), op_shape(moment1), op_shape(moment2))), param))
|
|
478
|
+
success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
|
|
479
|
+
(op_shape(param), op_shape(moment1), op_shape(moment2))), param))
|
|
450
480
|
else:
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
481
|
+
success = F.depend(success, opt(param, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
|
|
482
|
+
eps, gradient))
|
|
483
|
+
return success
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@_adam_opt.register("Function", "Function", "Function", "Function",
|
|
487
|
+
"Bool", "Bool", "Bool",
|
|
488
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
|
|
489
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
|
|
490
|
+
def _run_opt_with_one_number_use_amsgrad(opt, sparse_opt, push, pull,
|
|
491
|
+
use_locking, use_nesterov, target,
|
|
492
|
+
beta1_power, beta2_power, beta1, beta2, eps, lr,
|
|
493
|
+
gradient, param, moment1, moment2, vhat, ps_parameter, cache_enable):
|
|
494
|
+
"""Apply adam optimizer to the weight parameter using Tensor and use amsgrad."""
|
|
495
|
+
success = True
|
|
496
|
+
if ps_parameter and not cache_enable:
|
|
497
|
+
op_shape = P.Shape()
|
|
498
|
+
success = F.depend(success, pull(push((beta1_power, beta2_power, lr, gradient),
|
|
499
|
+
(op_shape(param), op_shape(moment1), op_shape(moment2),
|
|
500
|
+
op_shape(vhat))), param))
|
|
501
|
+
else:
|
|
502
|
+
success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power, lr, gradient))
|
|
456
503
|
return success
|
|
457
504
|
|
|
458
505
|
|
|
@@ -484,8 +531,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
|
|
|
484
531
|
validator.check_value_type("beta1", beta1, [float], prim_name)
|
|
485
532
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
486
533
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
487
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
488
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
534
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
535
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
489
536
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
490
537
|
|
|
491
538
|
|
|
@@ -723,7 +770,8 @@ class Adam(Optimizer):
|
|
|
723
770
|
self.use_offload = use_offload
|
|
724
771
|
self.moment1 = self._parameters.clone(prefix="moment1", init='zeros')
|
|
725
772
|
self.moment2 = self._parameters.clone(prefix="moment2", init='zeros')
|
|
726
|
-
|
|
773
|
+
if use_amsgrad:
|
|
774
|
+
self.vhat = self._parameters.clone(prefix="vhat", init='zeros')
|
|
727
775
|
|
|
728
776
|
if use_offload:
|
|
729
777
|
self.opt = P.AdamNoUpdateParam(use_locking, use_nesterov)
|
|
@@ -756,25 +804,8 @@ class Adam(Optimizer):
|
|
|
756
804
|
|
|
757
805
|
self._init_distributed_opts(use_locking, use_nesterov)
|
|
758
806
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
params = self._parameters
|
|
762
|
-
moment1 = self.moment1
|
|
763
|
-
moment2 = self.moment2
|
|
764
|
-
vhat = self.vhat
|
|
765
|
-
gradients = self.flatten_gradients(gradients)
|
|
766
|
-
gradients = self.decay_weight(gradients)
|
|
767
|
-
if not self.use_offload:
|
|
768
|
-
gradients = self.gradients_centralization(gradients)
|
|
769
|
-
gradients = self.scale_grad(gradients)
|
|
770
|
-
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
771
|
-
lr = self.get_lr()
|
|
772
|
-
|
|
773
|
-
beta1_power = self.beta1_power * self.beta1
|
|
774
|
-
self.beta1_power = beta1_power
|
|
775
|
-
beta2_power = self.beta2_power * self.beta2
|
|
776
|
-
self.beta2_power = beta2_power
|
|
777
|
-
|
|
807
|
+
def _apply_adam(self, params, beta1_power, beta2_power, moment1, moment2, lr, gradients):
|
|
808
|
+
"""Execute Adam optimizer and its variants."""
|
|
778
809
|
if self.use_offload:
|
|
779
810
|
if self.is_group_lr:
|
|
780
811
|
success = self.map_reverse(F.partial(_adam_opt, self.opt, beta1_power, beta2_power, self.beta1,
|
|
@@ -831,13 +862,19 @@ class Adam(Optimizer):
|
|
|
831
862
|
self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
|
|
832
863
|
self.eps), lr, gradients, params, moment1, moment2,
|
|
833
864
|
self.ps_parameters, self.cache_enable)
|
|
834
|
-
|
|
835
865
|
else:
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
866
|
+
if self.use_amsgrad:
|
|
867
|
+
success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
|
|
868
|
+
self._ps_pull, self.use_locking, self.use_nesterov,
|
|
869
|
+
self._is_device, beta1_power, beta2_power,
|
|
870
|
+
self.beta1, self.beta2, self.eps), lr, gradients, params,
|
|
871
|
+
moment1, moment2, self.vhat, self.ps_parameters, self.cache_enable)
|
|
872
|
+
else:
|
|
873
|
+
success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
|
|
874
|
+
self._ps_pull, self.use_locking, self.use_nesterov,
|
|
875
|
+
self._is_device, beta1_power, beta2_power,
|
|
876
|
+
self.beta1, self.beta2, self.eps), lr, gradients, params,
|
|
877
|
+
moment1, moment2, self.ps_parameters, self.cache_enable)
|
|
841
878
|
else:
|
|
842
879
|
if self.use_lazy:
|
|
843
880
|
success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt, self._ps_push,
|
|
@@ -845,15 +882,42 @@ class Adam(Optimizer):
|
|
|
845
882
|
self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
|
|
846
883
|
self.eps, lr), gradients, params, moment1, moment2,
|
|
847
884
|
self.ps_parameters, self.cache_enable)
|
|
848
|
-
|
|
849
885
|
else:
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
886
|
+
if self.use_amsgrad:
|
|
887
|
+
success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
|
|
888
|
+
self._ps_pull, self.use_locking, self.use_nesterov,
|
|
889
|
+
self._is_device, beta1_power, beta2_power,
|
|
890
|
+
self.beta1, self.beta2, self.eps, lr), gradients, params,
|
|
891
|
+
moment1, moment2, self.vhat, self.ps_parameters, self.cache_enable)
|
|
892
|
+
else:
|
|
893
|
+
success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self._ps_push,
|
|
894
|
+
self._ps_pull, self.use_locking, self.use_nesterov,
|
|
895
|
+
self._is_device, beta1_power, beta2_power,
|
|
896
|
+
self.beta1, self.beta2, self.eps, lr), gradients, params,
|
|
897
|
+
moment1, moment2, self.ps_parameters, self.cache_enable)
|
|
898
|
+
|
|
855
899
|
return success
|
|
856
900
|
|
|
901
|
+
@jit
|
|
902
|
+
def construct(self, gradients):
|
|
903
|
+
params = self._parameters
|
|
904
|
+
moment1 = self.moment1
|
|
905
|
+
moment2 = self.moment2
|
|
906
|
+
gradients = self.flatten_gradients(gradients)
|
|
907
|
+
gradients = self.decay_weight(gradients)
|
|
908
|
+
if not self.use_offload:
|
|
909
|
+
gradients = self.gradients_centralization(gradients)
|
|
910
|
+
gradients = self.scale_grad(gradients)
|
|
911
|
+
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
912
|
+
lr = self.get_lr()
|
|
913
|
+
|
|
914
|
+
beta1_power = self.beta1_power * self.beta1
|
|
915
|
+
self.beta1_power = beta1_power
|
|
916
|
+
beta2_power = self.beta2_power * self.beta2
|
|
917
|
+
self.beta2_power = beta2_power
|
|
918
|
+
|
|
919
|
+
return self._apply_adam(params, beta1_power, beta2_power, moment1, moment2, lr, gradients)
|
|
920
|
+
|
|
857
921
|
@Optimizer.target.setter
|
|
858
922
|
def target(self, value):
|
|
859
923
|
"""
|
|
@@ -907,13 +971,13 @@ class AdamWeightDecay(Optimizer):
|
|
|
907
971
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
|
908
972
|
:math:`g` represents `gradients`, :math:`\gamma` represents `learning_rate`,
|
|
909
973
|
:math:`\beta_1, \beta_2` represent `beta1` and `beta2`, :math:`t` represents the current step,
|
|
910
|
-
:math:`w` represents `params`, :math:`\
|
|
974
|
+
:math:`w` represents `params`, :math:`\lambda` represents `weight_decay`.
|
|
911
975
|
|
|
912
976
|
Note:
|
|
913
977
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
914
978
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
915
979
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
|
|
916
|
-
document `LossScale <https://www.mindspore.cn/tutorials/
|
|
980
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
|
|
917
981
|
process `loss_scale` correctly.
|
|
918
982
|
|
|
919
983
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
mindspore/nn/optim/adamax.py
CHANGED
|
@@ -23,10 +23,9 @@ from mindspore.ops import composite as C
|
|
|
23
23
|
from mindspore.ops import functional as F
|
|
24
24
|
from mindspore.common.parameter import Parameter
|
|
25
25
|
from mindspore.common.tensor import Tensor
|
|
26
|
-
from mindspore
|
|
26
|
+
from mindspore import _checkparam as validator
|
|
27
27
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
28
28
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
29
|
-
from mindspore._checkparam import Rel
|
|
30
29
|
|
|
31
30
|
_ada_max_opt = C.MultitypeFuncGraph("ada_max_opt")
|
|
32
31
|
|
|
@@ -44,8 +43,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
|
|
|
44
43
|
validator.check_value_type("beta1", beta1, [float], prim_name)
|
|
45
44
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
46
45
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
47
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
48
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
46
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
47
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
49
48
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
50
49
|
|
|
51
50
|
|
mindspore/nn/optim/adasum.py
CHANGED
|
@@ -22,7 +22,7 @@ import math
|
|
|
22
22
|
import mindspore.nn as nn
|
|
23
23
|
import mindspore.log as logger
|
|
24
24
|
from mindspore import context
|
|
25
|
-
from mindspore
|
|
25
|
+
from mindspore import _checkparam as validator
|
|
26
26
|
from mindspore.nn.cell import Cell
|
|
27
27
|
from mindspore.common.parameter import ParameterTuple, Parameter
|
|
28
28
|
from mindspore.parallel._utils import _get_global_rank, _get_stage_device_num
|
|
@@ -114,7 +114,7 @@ def _adasum_opt_forward_process(left_send, allreduce, parameter_divisibility, al
|
|
|
114
114
|
if parameter_divisibility:
|
|
115
115
|
delta_w = P.Squeeze()(delta_w)
|
|
116
116
|
ori_len = F.shape(delta_w)[0]
|
|
117
|
-
divide_len = ori_len
|
|
117
|
+
divide_len = ori_len // 2
|
|
118
118
|
left_part = delta_w[:divide_len]
|
|
119
119
|
right_part = delta_w[divide_len:]
|
|
120
120
|
else:
|
|
@@ -412,8 +412,8 @@ class AdaSumByGradWrapCell(Cell):
|
|
|
412
412
|
.. math::
|
|
413
413
|
\begin{array}{ll}
|
|
414
414
|
w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
|
|
415
|
-
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
|
|
416
|
-
|
|
415
|
+
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
|
|
416
|
+
\frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
|
|
417
417
|
\end{array}
|
|
418
418
|
|
|
419
419
|
In this implementation, :math:`g` represents the gradient of the weights,
|
|
@@ -479,8 +479,8 @@ class AdaSumByDeltaWeightWrapCell(Cell):
|
|
|
479
479
|
.. math::
|
|
480
480
|
\begin{array}{ll}
|
|
481
481
|
w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
|
|
482
|
-
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
|
|
483
|
-
|
|
482
|
+
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
|
|
483
|
+
\frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
|
|
484
484
|
\end{array}
|
|
485
485
|
|
|
486
486
|
In this implementation, :math:`g` represents the weight difference before and after the updating of optimizer,
|
mindspore/nn/optim/asgd.py
CHANGED
|
@@ -21,7 +21,7 @@ from mindspore.common.api import jit
|
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
22
|
import mindspore.common.dtype as mstype
|
|
23
23
|
import mindspore
|
|
24
|
-
from mindspore
|
|
24
|
+
from mindspore import _checkparam as validator
|
|
25
25
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
26
26
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
27
27
|
|
|
@@ -96,7 +96,7 @@ class ASGD(Optimizer):
|
|
|
96
96
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
97
97
|
|
|
98
98
|
lambd (float): The decay term. Default: 1e-4.
|
|
99
|
-
alpha (float): The power for :math
|
|
99
|
+
alpha (float): The power for :math:`\eta` update. Default: 0.75.
|
|
100
100
|
t0 (float): The point of starting averaging. Default: 1e6.
|
|
101
101
|
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
102
102
|
|
mindspore/nn/optim/ftrl.py
CHANGED
|
@@ -18,8 +18,7 @@ from __future__ import absolute_import
|
|
|
18
18
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
19
|
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
20
20
|
from mindspore.common.api import jit
|
|
21
|
-
from mindspore
|
|
22
|
-
from mindspore._checkparam import Rel
|
|
21
|
+
from mindspore import _checkparam as validator
|
|
23
22
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
24
23
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
25
24
|
from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
|
|
@@ -48,6 +47,62 @@ def _tensor_run_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_powe
|
|
|
48
47
|
return success
|
|
49
48
|
|
|
50
49
|
|
|
50
|
+
def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values):
|
|
51
|
+
"""Apllpy ftrl optimizer for map parameter"""
|
|
52
|
+
success = True
|
|
53
|
+
linear_slice = linear.get(indices)
|
|
54
|
+
moment_slice = moment.get(indices)
|
|
55
|
+
weight_slice = weight.get(indices)
|
|
56
|
+
|
|
57
|
+
op_pow = P.Pow()
|
|
58
|
+
op_sign = P.Sign()
|
|
59
|
+
op_greater = P.Greater()
|
|
60
|
+
op_select = P.Select()
|
|
61
|
+
op_abs = P.Abs()
|
|
62
|
+
|
|
63
|
+
lr_power_val = -lr_power
|
|
64
|
+
accu_pow = op_pow(moment_slice, lr_power_val)
|
|
65
|
+
moment_slice = F.depend(moment_slice, accu_pow)
|
|
66
|
+
cur_accu = moment_slice + values * values
|
|
67
|
+
cur_accu_pow = op_pow(cur_accu, lr_power_val)
|
|
68
|
+
sigma = (cur_accu_pow - accu_pow) / learning_rate
|
|
69
|
+
|
|
70
|
+
linear_slice = linear_slice + values - sigma * weight_slice
|
|
71
|
+
|
|
72
|
+
update_weight_cond = op_greater(op_abs(linear_slice), l1)
|
|
73
|
+
updated_weight = (l1 * op_sign(linear_slice) - linear_slice) / (cur_accu_pow / learning_rate + 2 * l2)
|
|
74
|
+
zeros = zeros_like(weight_slice)
|
|
75
|
+
|
|
76
|
+
weight_slice = op_select(update_weight_cond, updated_weight, zeros)
|
|
77
|
+
moment_slice = cur_accu
|
|
78
|
+
|
|
79
|
+
moment.put(indices, moment_slice)
|
|
80
|
+
linear.put(indices, linear_slice)
|
|
81
|
+
weight.put(indices, weight_slice)
|
|
82
|
+
|
|
83
|
+
return success
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
|
|
87
|
+
"MapTensor", "MapTensor", "MapTensor", "Bool", "Bool",
|
|
88
|
+
"Function", "Bool", "Function", "Bool")
|
|
89
|
+
def _run_map_tensor_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear,
|
|
90
|
+
gradient, weight, moment, ps_parameter, cache_enable,
|
|
91
|
+
distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
|
|
92
|
+
"""Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
|
|
93
|
+
success = True
|
|
94
|
+
indices, values = gradient.get_data()
|
|
95
|
+
if use_sparse_flag:
|
|
96
|
+
# PS Mode.
|
|
97
|
+
success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
|
|
98
|
+
elif cache_enable:
|
|
99
|
+
# PS Cache mode.
|
|
100
|
+
_apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
|
|
101
|
+
else:
|
|
102
|
+
raise Exception("Unexpected mode for distributed optimizer.")
|
|
103
|
+
return success
|
|
104
|
+
|
|
105
|
+
|
|
51
106
|
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
|
|
52
107
|
"Tensor", "Tensor", "Tensor", "Bool", "Bool",
|
|
53
108
|
"Function", "Bool", "Function", "Bool")
|
|
@@ -91,36 +146,7 @@ def _run_map_tensor_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power
|
|
|
91
146
|
"""Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
|
|
92
147
|
success = True
|
|
93
148
|
indices, values = gradient.get_data()
|
|
94
|
-
|
|
95
|
-
linear_slice = linear.get(indices)
|
|
96
|
-
moment_slice = moment.get(indices)
|
|
97
|
-
weight_slice = weight.get(indices)
|
|
98
|
-
|
|
99
|
-
op_pow = P.Pow()
|
|
100
|
-
op_sign = P.Sign()
|
|
101
|
-
op_greater = P.Greater()
|
|
102
|
-
op_select = P.Select()
|
|
103
|
-
|
|
104
|
-
lr_power_val = -lr_power
|
|
105
|
-
accu_pow = op_pow(moment_slice, lr_power_val)
|
|
106
|
-
moment_slice = F.depend(moment_slice, accu_pow)
|
|
107
|
-
cur_accu = moment_slice + values * values
|
|
108
|
-
cur_accu_pow = op_pow(cur_accu, lr_power_val)
|
|
109
|
-
sigma = (cur_accu_pow - accu_pow) / learning_rate
|
|
110
|
-
|
|
111
|
-
linear_slice = linear_slice + values - sigma * weight_slice
|
|
112
|
-
|
|
113
|
-
update_weight_cond = op_greater(linear_slice, l1)
|
|
114
|
-
updated_weight = (l1 * op_sign(linear_slice) - linear_slice) / (cur_accu_pow / learning_rate + 2 * l2)
|
|
115
|
-
zeros = zeros_like(weight_slice)
|
|
116
|
-
|
|
117
|
-
weight_slice = op_select(update_weight_cond, updated_weight, zeros)
|
|
118
|
-
moment_slice = cur_accu
|
|
119
|
-
|
|
120
|
-
moment.put(indices, moment_slice)
|
|
121
|
-
linear.put(indices, linear_slice)
|
|
122
|
-
weight.put(indices, weight_slice)
|
|
123
|
-
|
|
149
|
+
_apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
|
|
124
150
|
return success
|
|
125
151
|
|
|
126
152
|
|
|
@@ -139,19 +165,22 @@ def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate,
|
|
|
139
165
|
return success
|
|
140
166
|
|
|
141
167
|
|
|
142
|
-
def _check_param(initial_accum, lr_power, l1, l2, use_locking, prim_name=None):
|
|
168
|
+
def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, prim_name=None):
|
|
143
169
|
"""Check param."""
|
|
144
170
|
validator.check_value_type("initial_accum", initial_accum, [float], prim_name)
|
|
145
|
-
validator.check_number("initial_accum", initial_accum, 0.0,
|
|
171
|
+
validator.check_number("initial_accum", initial_accum, 0.0, validator.GE, prim_name)
|
|
172
|
+
|
|
173
|
+
validator.check_value_type("learning_rate", learning_rate, [float], prim_name)
|
|
174
|
+
validator.check_positive_float(learning_rate, "learning_rate", prim_name)
|
|
146
175
|
|
|
147
176
|
validator.check_value_type("lr_power", lr_power, [float], prim_name)
|
|
148
|
-
validator.check_number("lr_power", lr_power, 0.0,
|
|
177
|
+
validator.check_number("lr_power", lr_power, 0.0, validator.LE, prim_name)
|
|
149
178
|
|
|
150
179
|
validator.check_value_type("l1", l1, [float], prim_name)
|
|
151
|
-
validator.check_number("l1", l1, 0.0,
|
|
180
|
+
validator.check_number("l1", l1, 0.0, validator.GE, prim_name)
|
|
152
181
|
|
|
153
182
|
validator.check_value_type("l2", l2, [float], prim_name)
|
|
154
|
-
validator.check_number("l2", l2, 0.0,
|
|
183
|
+
validator.check_number("l2", l2, 0.0, validator.GE, prim_name)
|
|
155
184
|
|
|
156
185
|
validator.check_value_type("use_locking", use_locking, [bool], prim_name)
|
|
157
186
|
|
|
@@ -295,7 +324,7 @@ class FTRL(Optimizer):
|
|
|
295
324
|
raise ValueError(f"For 'FTRL', dynamic learning rate and group learning rate are currently not supported "
|
|
296
325
|
f"in FTRL, they should all be false, but got dynamic learning rate {self.dynamic_lr} and"
|
|
297
326
|
f" group learning rate {self.is_group_lr}.")
|
|
298
|
-
_check_param(initial_accum, lr_power, l1, l2, use_locking, self.cls_name)
|
|
327
|
+
_check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, self.cls_name)
|
|
299
328
|
self.moments = self._parameters.clone(prefix="moments", init=initial_accum)
|
|
300
329
|
self.linear = self._parameters.clone(prefix="linear", init='zeros')
|
|
301
330
|
self.l1 = l1
|
mindspore/nn/optim/lamb.py
CHANGED
|
@@ -23,8 +23,7 @@ from mindspore.ops import functional as F
|
|
|
23
23
|
from mindspore.ops.operations import _inner_ops as inner
|
|
24
24
|
from mindspore.common.tensor import Tensor
|
|
25
25
|
from mindspore.common.api import jit
|
|
26
|
-
from mindspore
|
|
27
|
-
from mindspore._checkparam import Rel
|
|
26
|
+
from mindspore import _checkparam as validator
|
|
28
27
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
29
28
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
30
29
|
|
|
@@ -69,8 +68,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
|
|
|
69
68
|
validator.check_value_type("beta1", beta1, [float], prim_name)
|
|
70
69
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
71
70
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
72
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
73
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
71
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
72
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
74
73
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
75
74
|
|
|
76
75
|
|
|
@@ -133,7 +132,7 @@ class Lamb(Optimizer):
|
|
|
133
132
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
134
133
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
135
134
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
|
|
136
|
-
document `LossScale <https://www.mindspore.cn/tutorials/
|
|
135
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
|
|
137
136
|
process `loss_scale` correctly.
|
|
138
137
|
|
|
139
138
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|