mindspore 1.10.0__cp37-none-any.whl → 2.0.0rc1__cp37-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +9064 -0
- mindspore/__init__.py +9 -4
- mindspore/_akg/akg/composite/build_module.py +11 -0
- mindspore/_akg/akg/config/repository_cuda.json +11 -0
- mindspore/_akg/akg/tvm/contrib/nvcc.py +4 -3
- mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +53 -58
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/config/super_bar_config.json +512 -0
- mindspore/context.py +291 -56
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/include/mindapi/base/type_id.h +42 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libicudata.so.69 +0 -0
- mindspore/lib/libicui18n.so.69 +0 -0
- mindspore/lib/libicuuc.so.69 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/{libakg.so → plugin/cpu/libakg.so} +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/scipy/linalg.py +13 -117
- mindspore/scipy/ops.py +5 -71
- mindspore/scipy/ops_grad.py +1 -25
- mindspore/scipy/ops_wrapper.py +1 -1
- mindspore/scipy/optimize/_bfgs.py +1 -1
- mindspore/scipy/optimize/_lagrange.py +200 -0
- mindspore/scipy/optimize/line_search.py +3 -2
- mindspore/scipy/optimize/minimize.py +43 -6
- mindspore/scipy/sparse/__init__.py +2 -2
- mindspore/scipy/sparse/linalg.py +5 -465
- mindspore/scipy/utils.py +2 -1
- mindspore/scipy/utils_const.py +7 -1
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +899 -675
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
mindspore/nn/optim/adamax.py
CHANGED
|
@@ -17,16 +17,15 @@ from __future__ import absolute_import
|
|
|
17
17
|
|
|
18
18
|
from mindspore.common import dtype as mstype
|
|
19
19
|
from mindspore.common.initializer import initializer
|
|
20
|
-
from mindspore.common.api import
|
|
20
|
+
from mindspore.common.api import jit
|
|
21
21
|
from mindspore.ops import operations as P
|
|
22
22
|
from mindspore.ops import composite as C
|
|
23
23
|
from mindspore.ops import functional as F
|
|
24
24
|
from mindspore.common.parameter import Parameter
|
|
25
25
|
from mindspore.common.tensor import Tensor
|
|
26
|
-
from mindspore
|
|
26
|
+
from mindspore import _checkparam as validator
|
|
27
27
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
28
28
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
29
|
-
from mindspore._checkparam import Rel
|
|
30
29
|
|
|
31
30
|
_ada_max_opt = C.MultitypeFuncGraph("ada_max_opt")
|
|
32
31
|
|
|
@@ -44,8 +43,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
|
|
|
44
43
|
validator.check_value_type("beta1", beta1, [float], prim_name)
|
|
45
44
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
46
45
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
47
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
48
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
46
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
47
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
49
48
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
50
49
|
|
|
51
50
|
|
|
@@ -196,7 +195,7 @@ class AdaMax(Optimizer):
|
|
|
196
195
|
|
|
197
196
|
self.opt = P.ApplyAdaMax()
|
|
198
197
|
|
|
199
|
-
@
|
|
198
|
+
@jit
|
|
200
199
|
def construct(self, gradients):
|
|
201
200
|
gradients = self.flatten_gradients(gradients)
|
|
202
201
|
gradients = self.decay_weight(gradients)
|
mindspore/nn/optim/adasum.py
CHANGED
|
@@ -22,7 +22,7 @@ import math
|
|
|
22
22
|
import mindspore.nn as nn
|
|
23
23
|
import mindspore.log as logger
|
|
24
24
|
from mindspore import context
|
|
25
|
-
from mindspore
|
|
25
|
+
from mindspore import _checkparam as validator
|
|
26
26
|
from mindspore.nn.cell import Cell
|
|
27
27
|
from mindspore.common.parameter import ParameterTuple, Parameter
|
|
28
28
|
from mindspore.parallel._utils import _get_global_rank, _get_stage_device_num
|
|
@@ -47,7 +47,8 @@ def _update_parameters_adasum(delta_weight, update_delta_weight, parameter, old_
|
|
|
47
47
|
shape = F.shape(delta_weight)
|
|
48
48
|
update_delta_weight = reshape(update_delta_weight, shape)
|
|
49
49
|
new_parameter = old_parameter - update_delta_weight
|
|
50
|
-
|
|
50
|
+
P.Assign()(parameter, new_parameter)
|
|
51
|
+
return parameter
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
@_reshape_grads.register("Tensor", "Tensor", "Function")
|
|
@@ -113,7 +114,7 @@ def _adasum_opt_forward_process(left_send, allreduce, parameter_divisibility, al
|
|
|
113
114
|
if parameter_divisibility:
|
|
114
115
|
delta_w = P.Squeeze()(delta_w)
|
|
115
116
|
ori_len = F.shape(delta_w)[0]
|
|
116
|
-
divide_len = ori_len
|
|
117
|
+
divide_len = ori_len // 2
|
|
117
118
|
left_part = delta_w[:divide_len]
|
|
118
119
|
right_part = delta_w[divide_len:]
|
|
119
120
|
else:
|
|
@@ -377,7 +378,8 @@ def _get_delta_weight_process(new_parameter, old_parameter):
|
|
|
377
378
|
|
|
378
379
|
@_save_weight.register("Tensor", "Tensor")
|
|
379
380
|
def _save_weight_process(new_parameter, old_parameter):
|
|
380
|
-
|
|
381
|
+
P.Assign()(new_parameter, old_parameter)
|
|
382
|
+
return new_parameter
|
|
381
383
|
|
|
382
384
|
|
|
383
385
|
@_clone_weight.register("Tensor", "Tensor")
|
|
@@ -410,8 +412,8 @@ class AdaSumByGradWrapCell(Cell):
|
|
|
410
412
|
.. math::
|
|
411
413
|
\begin{array}{ll}
|
|
412
414
|
w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
|
|
413
|
-
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
|
|
414
|
-
|
|
415
|
+
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
|
|
416
|
+
\frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
|
|
415
417
|
\end{array}
|
|
416
418
|
|
|
417
419
|
In this implementation, :math:`g` represents the gradient of the weights,
|
|
@@ -477,8 +479,8 @@ class AdaSumByDeltaWeightWrapCell(Cell):
|
|
|
477
479
|
.. math::
|
|
478
480
|
\begin{array}{ll}
|
|
479
481
|
w_{t+1}=w_{t} - \alpha \cdot Adasum(g_{1}, g_{2}) \\
|
|
480
|
-
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 +
|
|
481
|
-
|
|
482
|
+
w_{t+1}=w_{t} - \alpha \cdot [(1 - \frac{g_2^{T}\cdot g_1}{2\cdot \left \| g_1 \right \|^2 })\cdot g_1 + (1 -
|
|
483
|
+
\frac{g_1^{T}\cdot g_2}{2\cdot \left \| g_2 \right \|^2 })\cdot g_2] \\
|
|
482
484
|
\end{array}
|
|
483
485
|
|
|
484
486
|
In this implementation, :math:`g` represents the weight difference before and after the updating of optimizer,
|
mindspore/nn/optim/asgd.py
CHANGED
|
@@ -17,11 +17,11 @@ from __future__ import absolute_import
|
|
|
17
17
|
|
|
18
18
|
from mindspore.ops import operations as P
|
|
19
19
|
from mindspore.common.parameter import Parameter
|
|
20
|
-
from mindspore.common.api import
|
|
20
|
+
from mindspore.common.api import jit
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
22
|
import mindspore.common.dtype as mstype
|
|
23
23
|
import mindspore
|
|
24
|
-
from mindspore
|
|
24
|
+
from mindspore import _checkparam as validator
|
|
25
25
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
26
26
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
27
27
|
|
|
@@ -45,7 +45,7 @@ class ASGD(Optimizer):
|
|
|
45
45
|
|
|
46
46
|
:math:`\lambda` represents the decay term, :math:`\mu` and :math:`\eta` are tracked to
|
|
47
47
|
update :math:`ax` and :math:`w`, :math:`t0` represents the point of starting averaging,
|
|
48
|
-
:math:`\alpha` represents the power for eta update, :math:`ax` represents the averaged
|
|
48
|
+
:math:`\alpha` represents the power for :math:`\eta` update, :math:`ax` represents the averaged
|
|
49
49
|
parameter value, :math:`t` represents the current step, :math:`g` represents `gradients`,
|
|
50
50
|
:math:`w` represents `params`.
|
|
51
51
|
|
|
@@ -81,7 +81,7 @@ class ASGD(Optimizer):
|
|
|
81
81
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
82
82
|
one group of `params`.
|
|
83
83
|
|
|
84
|
-
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]):
|
|
84
|
+
learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): learning_rate. Default: 0.1.
|
|
85
85
|
|
|
86
86
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
87
87
|
|
|
@@ -96,7 +96,7 @@ class ASGD(Optimizer):
|
|
|
96
96
|
LearningRateSchedule with step as the input to get the learning rate of current step.
|
|
97
97
|
|
|
98
98
|
lambd (float): The decay term. Default: 1e-4.
|
|
99
|
-
alpha (float): The power for eta update. Default: 0.75.
|
|
99
|
+
alpha (float): The power for :math:`\eta` update. Default: 0.75.
|
|
100
100
|
t0 (float): The point of starting averaging. Default: 1e6.
|
|
101
101
|
weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
|
|
102
102
|
|
|
@@ -149,7 +149,7 @@ class ASGD(Optimizer):
|
|
|
149
149
|
"""
|
|
150
150
|
|
|
151
151
|
@opt_init_args_register
|
|
152
|
-
def __init__(self, params, learning_rate=0.1, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0.):
|
|
152
|
+
def __init__(self, params, learning_rate=0.1, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0.0):
|
|
153
153
|
|
|
154
154
|
super(ASGD, self).__init__(learning_rate, params, weight_decay)
|
|
155
155
|
|
|
@@ -176,7 +176,7 @@ class ASGD(Optimizer):
|
|
|
176
176
|
self.cast = P.Cast()
|
|
177
177
|
self.squeeze = P.Squeeze()
|
|
178
178
|
|
|
179
|
-
@
|
|
179
|
+
@jit
|
|
180
180
|
def construct(self, gradients):
|
|
181
181
|
gradients = self.flatten_gradients(gradients)
|
|
182
182
|
gradients = self.decay_weight(gradients)
|
mindspore/nn/optim/ftrl.py
CHANGED
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
|
-
from mindspore.
|
|
20
|
-
from mindspore.
|
|
21
|
-
from mindspore
|
|
19
|
+
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
20
|
+
from mindspore.common.api import jit
|
|
21
|
+
from mindspore import _checkparam as validator
|
|
22
22
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
23
23
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
24
24
|
from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
|
|
@@ -47,6 +47,62 @@ def _tensor_run_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_powe
|
|
|
47
47
|
return success
|
|
48
48
|
|
|
49
49
|
|
|
50
|
+
def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values):
|
|
51
|
+
"""Apllpy ftrl optimizer for map parameter"""
|
|
52
|
+
success = True
|
|
53
|
+
linear_slice = linear.get(indices)
|
|
54
|
+
moment_slice = moment.get(indices)
|
|
55
|
+
weight_slice = weight.get(indices)
|
|
56
|
+
|
|
57
|
+
op_pow = P.Pow()
|
|
58
|
+
op_sign = P.Sign()
|
|
59
|
+
op_greater = P.Greater()
|
|
60
|
+
op_select = P.Select()
|
|
61
|
+
op_abs = P.Abs()
|
|
62
|
+
|
|
63
|
+
lr_power_val = -lr_power
|
|
64
|
+
accu_pow = op_pow(moment_slice, lr_power_val)
|
|
65
|
+
moment_slice = F.depend(moment_slice, accu_pow)
|
|
66
|
+
cur_accu = moment_slice + values * values
|
|
67
|
+
cur_accu_pow = op_pow(cur_accu, lr_power_val)
|
|
68
|
+
sigma = (cur_accu_pow - accu_pow) / learning_rate
|
|
69
|
+
|
|
70
|
+
linear_slice = linear_slice + values - sigma * weight_slice
|
|
71
|
+
|
|
72
|
+
update_weight_cond = op_greater(op_abs(linear_slice), l1)
|
|
73
|
+
updated_weight = (l1 * op_sign(linear_slice) - linear_slice) / (cur_accu_pow / learning_rate + 2 * l2)
|
|
74
|
+
zeros = zeros_like(weight_slice)
|
|
75
|
+
|
|
76
|
+
weight_slice = op_select(update_weight_cond, updated_weight, zeros)
|
|
77
|
+
moment_slice = cur_accu
|
|
78
|
+
|
|
79
|
+
moment.put(indices, moment_slice)
|
|
80
|
+
linear.put(indices, linear_slice)
|
|
81
|
+
weight.put(indices, weight_slice)
|
|
82
|
+
|
|
83
|
+
return success
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
|
|
87
|
+
"MapTensor", "MapTensor", "MapTensor", "Bool", "Bool",
|
|
88
|
+
"Function", "Bool", "Function", "Bool")
|
|
89
|
+
def _run_map_tensor_opt_with_sparse_dist(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear,
|
|
90
|
+
gradient, weight, moment, ps_parameter, cache_enable,
|
|
91
|
+
distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
|
|
92
|
+
"""Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
|
|
93
|
+
success = True
|
|
94
|
+
indices, values = gradient.get_data()
|
|
95
|
+
if use_sparse_flag:
|
|
96
|
+
# PS Mode.
|
|
97
|
+
success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
|
|
98
|
+
elif cache_enable:
|
|
99
|
+
# PS Cache mode.
|
|
100
|
+
_apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
|
|
101
|
+
else:
|
|
102
|
+
raise Exception("Unexpected mode for distributed optimizer.")
|
|
103
|
+
return success
|
|
104
|
+
|
|
105
|
+
|
|
50
106
|
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
|
|
51
107
|
"Tensor", "Tensor", "Tensor", "Bool", "Bool",
|
|
52
108
|
"Function", "Bool", "Function", "Bool")
|
|
@@ -83,6 +139,17 @@ def _tensor_run_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power, le
|
|
|
83
139
|
return success
|
|
84
140
|
|
|
85
141
|
|
|
142
|
+
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
|
|
143
|
+
"MapTensor", "MapTensor", "MapTensor", "Bool", "Bool")
|
|
144
|
+
def _run_map_tensor_opt_with_sparse(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear,
|
|
145
|
+
gradient, weight, moment, ps_parameter, cache_enable):
|
|
146
|
+
"""Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
|
|
147
|
+
success = True
|
|
148
|
+
indices, values = gradient.get_data()
|
|
149
|
+
_apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
|
|
150
|
+
return success
|
|
151
|
+
|
|
152
|
+
|
|
86
153
|
@_ftrl_opt.register("Function", "Function", "Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
|
|
87
154
|
"Tensor", "Tensor", "Tensor", "Bool", "Bool")
|
|
88
155
|
def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate, linear,
|
|
@@ -98,19 +165,22 @@ def _tensor_run_opt(opt, spars_opt, push, pull, l1, l2, lr_power, learning_rate,
|
|
|
98
165
|
return success
|
|
99
166
|
|
|
100
167
|
|
|
101
|
-
def _check_param(initial_accum, lr_power, l1, l2, use_locking, prim_name=None):
|
|
168
|
+
def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, prim_name=None):
|
|
102
169
|
"""Check param."""
|
|
103
170
|
validator.check_value_type("initial_accum", initial_accum, [float], prim_name)
|
|
104
|
-
validator.check_number("initial_accum", initial_accum, 0.0,
|
|
171
|
+
validator.check_number("initial_accum", initial_accum, 0.0, validator.GE, prim_name)
|
|
172
|
+
|
|
173
|
+
validator.check_value_type("learning_rate", learning_rate, [float], prim_name)
|
|
174
|
+
validator.check_positive_float(learning_rate, "learning_rate", prim_name)
|
|
105
175
|
|
|
106
176
|
validator.check_value_type("lr_power", lr_power, [float], prim_name)
|
|
107
|
-
validator.check_number("lr_power", lr_power, 0.0,
|
|
177
|
+
validator.check_number("lr_power", lr_power, 0.0, validator.LE, prim_name)
|
|
108
178
|
|
|
109
179
|
validator.check_value_type("l1", l1, [float], prim_name)
|
|
110
|
-
validator.check_number("l1", l1, 0.0,
|
|
180
|
+
validator.check_number("l1", l1, 0.0, validator.GE, prim_name)
|
|
111
181
|
|
|
112
182
|
validator.check_value_type("l2", l2, [float], prim_name)
|
|
113
|
-
validator.check_number("l2", l2, 0.0,
|
|
183
|
+
validator.check_number("l2", l2, 0.0, validator.GE, prim_name)
|
|
114
184
|
|
|
115
185
|
validator.check_value_type("use_locking", use_locking, [bool], prim_name)
|
|
116
186
|
|
|
@@ -254,7 +324,7 @@ class FTRL(Optimizer):
|
|
|
254
324
|
raise ValueError(f"For 'FTRL', dynamic learning rate and group learning rate are currently not supported "
|
|
255
325
|
f"in FTRL, they should all be false, but got dynamic learning rate {self.dynamic_lr} and"
|
|
256
326
|
f" group learning rate {self.is_group_lr}.")
|
|
257
|
-
_check_param(initial_accum, lr_power, l1, l2, use_locking, self.cls_name)
|
|
327
|
+
_check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, self.cls_name)
|
|
258
328
|
self.moments = self._parameters.clone(prefix="moments", init=initial_accum)
|
|
259
329
|
self.linear = self._parameters.clone(prefix="linear", init='zeros')
|
|
260
330
|
self.l1 = l1
|
|
@@ -276,7 +346,7 @@ class FTRL(Optimizer):
|
|
|
276
346
|
|
|
277
347
|
self._init_distributed_opts(use_locking, learning_rate, l1, l2, lr_power)
|
|
278
348
|
|
|
279
|
-
@
|
|
349
|
+
@jit
|
|
280
350
|
def construct(self, grads):
|
|
281
351
|
params = self._parameters
|
|
282
352
|
moments = self.moments
|
|
@@ -316,7 +386,7 @@ class FTRL(Optimizer):
|
|
|
316
386
|
|
|
317
387
|
if value == 'CPU':
|
|
318
388
|
self.sparse_opt = P.FusedSparseFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
|
|
319
|
-
self.sparse_opt.
|
|
389
|
+
self.sparse_opt.set_device("CPU")
|
|
320
390
|
else:
|
|
321
391
|
self.sparse_opt = P.SparseApplyFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
|
|
322
392
|
|
mindspore/nn/optim/lamb.py
CHANGED
|
@@ -22,9 +22,8 @@ from mindspore.ops import composite as C
|
|
|
22
22
|
from mindspore.ops import functional as F
|
|
23
23
|
from mindspore.ops.operations import _inner_ops as inner
|
|
24
24
|
from mindspore.common.tensor import Tensor
|
|
25
|
-
from mindspore.common.api import
|
|
26
|
-
from mindspore
|
|
27
|
-
from mindspore._checkparam import Rel
|
|
25
|
+
from mindspore.common.api import jit
|
|
26
|
+
from mindspore import _checkparam as validator
|
|
28
27
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
29
28
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
30
29
|
|
|
@@ -69,8 +68,8 @@ def _check_param_value(beta1, beta2, eps, prim_name):
|
|
|
69
68
|
validator.check_value_type("beta1", beta1, [float], prim_name)
|
|
70
69
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
71
70
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
72
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
73
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
71
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
72
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
74
73
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
75
74
|
|
|
76
75
|
|
|
@@ -113,7 +112,7 @@ class Lamb(Optimizer):
|
|
|
113
112
|
/(\sqrt{\hat{\boldsymbol{v}}_{t}}+\epsilon) \\
|
|
114
113
|
&\hspace{5mm}\boldsymbol{w}_{t+1}^{(i)}=\boldsymbol{w}_{t}^{(i)}- \gamma_{t}
|
|
115
114
|
\frac{\boldsymbol{\phi}\left(\left\|\boldsymbol{w}_{t}^{(i)}\right\|\right)}
|
|
116
|
-
{\left\|\boldsymbol{
|
|
115
|
+
{\left\|\boldsymbol{r}_{t}^{(i)}+\lambda \boldsymbol{w}_{t}^{(i)}\right\|}\left(\boldsymbol{r}_{t}^{(i)}+
|
|
117
116
|
\lambda \boldsymbol{w}_{t}^{(i)}\right) \\
|
|
118
117
|
&\textbf{end for} \\[-1.ex]
|
|
119
118
|
&\newline
|
|
@@ -133,7 +132,7 @@ class Lamb(Optimizer):
|
|
|
133
132
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
134
133
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
135
134
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
|
|
136
|
-
document `LossScale <https://www.mindspore.cn/tutorials/
|
|
135
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
|
|
137
136
|
process `loss_scale` correctly.
|
|
138
137
|
|
|
139
138
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -259,7 +258,7 @@ class Lamb(Optimizer):
|
|
|
259
258
|
self.moments2 = self.params.clone(prefix="lamb_v", init='zeros')
|
|
260
259
|
self.device_ascend = context.get_context("device_target") == "Ascend"
|
|
261
260
|
|
|
262
|
-
@
|
|
261
|
+
@jit
|
|
263
262
|
def construct(self, gradients):
|
|
264
263
|
weight_decay = self.get_weight_decay()
|
|
265
264
|
lr = self.get_lr()
|
mindspore/nn/optim/lars.py
CHANGED
|
@@ -18,9 +18,9 @@ from __future__ import absolute_import
|
|
|
18
18
|
from mindspore.ops import operations as P
|
|
19
19
|
from mindspore.ops import composite as C
|
|
20
20
|
from mindspore.ops import functional as F
|
|
21
|
-
from mindspore
|
|
21
|
+
from mindspore import _checkparam as validator
|
|
22
22
|
from mindspore.common import Tensor, Parameter, dtype as mstype
|
|
23
|
-
from mindspore.common.api import
|
|
23
|
+
from mindspore.common.api import jit
|
|
24
24
|
from mindspore.nn.optim.optimizer import _grad_scale, Optimizer
|
|
25
25
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
26
26
|
|
|
@@ -83,7 +83,7 @@ class LARS(Optimizer):
|
|
|
83
83
|
\end{array}
|
|
84
84
|
|
|
85
85
|
:math:`w` represents the network parameters, :math:`g` represents `gradients`,
|
|
86
|
-
:math:`t` represents the current step, :math:`\
|
|
86
|
+
:math:`t` represents the current step, :math:`\lambda` represents `weight_decay` in `optimizer`,
|
|
87
87
|
:math:`\gamma` represents `learning_rate` in `optimizer`, :math:`\eta` represents `coefficient`.
|
|
88
88
|
|
|
89
89
|
Args:
|
|
@@ -171,7 +171,7 @@ class LARS(Optimizer):
|
|
|
171
171
|
|
|
172
172
|
return lr
|
|
173
173
|
|
|
174
|
-
@
|
|
174
|
+
@jit
|
|
175
175
|
def construct(self, gradients):
|
|
176
176
|
params = self.parameters
|
|
177
177
|
gradients = self.flatten_gradients(gradients)
|
mindspore/nn/optim/lazyadam.py
CHANGED
|
@@ -17,17 +17,17 @@ from __future__ import absolute_import
|
|
|
17
17
|
|
|
18
18
|
from mindspore.common import dtype as mstype
|
|
19
19
|
from mindspore.common.initializer import initializer
|
|
20
|
-
from mindspore.common.api import
|
|
20
|
+
from mindspore.common.api import jit
|
|
21
21
|
from mindspore.ops import operations as P
|
|
22
22
|
from mindspore.ops import composite as C
|
|
23
23
|
from mindspore.ops import functional as F
|
|
24
24
|
from mindspore.common.parameter import Parameter
|
|
25
25
|
from mindspore.common.tensor import Tensor
|
|
26
|
-
from mindspore
|
|
27
|
-
from mindspore._checkparam import Rel
|
|
26
|
+
from mindspore import _checkparam as validator
|
|
28
27
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
29
28
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
30
29
|
from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
|
|
30
|
+
from mindspore.common._decorator import deprecated
|
|
31
31
|
|
|
32
32
|
_lazy_adam_opt = C.MultitypeFuncGraph("lazy_adam_opt")
|
|
33
33
|
|
|
@@ -85,6 +85,46 @@ def _run_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nest
|
|
|
85
85
|
return success
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
89
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
|
|
90
|
+
"Bool", "Bool", "Function", "Bool", "Function", "Bool")
|
|
91
|
+
def _run_map_tensor_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power,
|
|
92
|
+
beta2_power, beta1, beta2, eps, lr, gradient, params, m, v,
|
|
93
|
+
ps_parameter, cache_enable, distributed_opt, use_flag, distributed_sparse_opt,
|
|
94
|
+
use_sparse_flag):
|
|
95
|
+
"""Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
|
|
96
|
+
success = True
|
|
97
|
+
indices, values = gradient.get_data()
|
|
98
|
+
if use_sparse_flag:
|
|
99
|
+
# PS Mode.
|
|
100
|
+
success = F.depend(success, distributed_sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
|
|
101
|
+
eps, values, indices))
|
|
102
|
+
else:
|
|
103
|
+
# PS Cache mode.
|
|
104
|
+
op_sqrt = P.Sqrt()
|
|
105
|
+
|
|
106
|
+
m_slice = m.get(indices)
|
|
107
|
+
v_slice = v.get(indices)
|
|
108
|
+
|
|
109
|
+
next_m = m_slice * beta1 + values * (1 - beta1)
|
|
110
|
+
next_v = v_slice * beta2 + values * values * (1 - beta2)
|
|
111
|
+
|
|
112
|
+
lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
|
|
113
|
+
|
|
114
|
+
if use_nesterov:
|
|
115
|
+
m_temp = beta1 * next_m + values * (1 - beta1)
|
|
116
|
+
param_update = m_temp / (op_sqrt(next_v) + eps)
|
|
117
|
+
else:
|
|
118
|
+
param_update = next_m / (op_sqrt(next_v) + eps)
|
|
119
|
+
|
|
120
|
+
params_need_update = params.get(indices)
|
|
121
|
+
params.put(indices, params_need_update - lr_t * param_update)
|
|
122
|
+
m.put(indices, next_m)
|
|
123
|
+
v.put(indices, next_v)
|
|
124
|
+
|
|
125
|
+
return success
|
|
126
|
+
|
|
127
|
+
|
|
88
128
|
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
89
129
|
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool",
|
|
90
130
|
"Function", "Bool", "Function", "Bool")
|
|
@@ -155,6 +195,40 @@ def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov,
|
|
|
155
195
|
return success
|
|
156
196
|
|
|
157
197
|
|
|
198
|
+
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
199
|
+
"Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
|
|
200
|
+
"Bool", "Bool")
|
|
201
|
+
def _run_map_tensor_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power,
|
|
202
|
+
beta2_power, beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter,
|
|
203
|
+
cache_enable):
|
|
204
|
+
"""Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse(MapTensor)."""
|
|
205
|
+
success = True
|
|
206
|
+
indices, values = gradient.get_data()
|
|
207
|
+
|
|
208
|
+
op_sqrt = P.Sqrt()
|
|
209
|
+
|
|
210
|
+
m_slice = m.get(indices)
|
|
211
|
+
v_slice = v.get(indices)
|
|
212
|
+
|
|
213
|
+
next_m = m_slice * beta1 + values * (1 - beta1)
|
|
214
|
+
next_v = v_slice * beta2 + values * values * (1 - beta2)
|
|
215
|
+
|
|
216
|
+
lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
|
|
217
|
+
|
|
218
|
+
if use_nesterov:
|
|
219
|
+
m_temp = beta1 * next_m + values * (1 - beta1)
|
|
220
|
+
param_update = m_temp / (op_sqrt(next_v) + eps)
|
|
221
|
+
else:
|
|
222
|
+
param_update = next_m / (op_sqrt(next_v) + eps)
|
|
223
|
+
|
|
224
|
+
params_need_update = params.get(indices)
|
|
225
|
+
params.put(indices, params_need_update - lr_t * param_update)
|
|
226
|
+
m.put(indices, next_m)
|
|
227
|
+
v.put(indices, next_v)
|
|
228
|
+
|
|
229
|
+
return success
|
|
230
|
+
|
|
231
|
+
|
|
158
232
|
@_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
|
|
159
233
|
"Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
|
|
160
234
|
def _run_opt_with_one_number(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power,
|
|
@@ -177,8 +251,8 @@ def _check_param_value(beta1, beta2, eps, weight_decay, prim_name):
|
|
|
177
251
|
validator.check_value_type("beta2", beta2, [float], prim_name)
|
|
178
252
|
validator.check_value_type("eps", eps, [float], prim_name)
|
|
179
253
|
validator.check_value_type("weight_dacay", weight_decay, [float], prim_name)
|
|
180
|
-
validator.check_float_range(beta1, 0.0, 1.0,
|
|
181
|
-
validator.check_float_range(beta2, 0.0, 1.0,
|
|
254
|
+
validator.check_float_range(beta1, 0.0, 1.0, validator.INC_NEITHER, "beta1", prim_name)
|
|
255
|
+
validator.check_float_range(beta2, 0.0, 1.0, validator.INC_NEITHER, "beta2", prim_name)
|
|
182
256
|
validator.check_positive_float(eps, "eps", prim_name)
|
|
183
257
|
validator.check_non_negative_float(weight_decay, "weight_decay", prim_name)
|
|
184
258
|
|
|
@@ -332,6 +406,7 @@ class LazyAdam(Optimizer):
|
|
|
332
406
|
>>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
|
|
333
407
|
"""
|
|
334
408
|
|
|
409
|
+
@deprecated("2.0", "Adam", False)
|
|
335
410
|
@opt_init_args_register
|
|
336
411
|
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
|
|
337
412
|
use_nesterov=False, weight_decay=0.0, loss_scale=1.0):
|
|
@@ -352,14 +427,14 @@ class LazyAdam(Optimizer):
|
|
|
352
427
|
self.moment2 = self._parameters.clone(prefix="moment2", init='zeros')
|
|
353
428
|
self.opt = P.Adam(use_locking, use_nesterov)
|
|
354
429
|
self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
|
|
355
|
-
self.sparse_opt.
|
|
430
|
+
self.sparse_opt.set_device("CPU")
|
|
356
431
|
self._ps_pull = P.Pull()
|
|
357
432
|
self._ps_push = P.Push("Adam", [0, 1, 2])
|
|
358
433
|
self._ps_push.add_prim_attr("use_nesterov", use_nesterov)
|
|
359
434
|
|
|
360
435
|
self._init_distributed_opts(use_locking, use_nesterov)
|
|
361
436
|
|
|
362
|
-
@
|
|
437
|
+
@jit
|
|
363
438
|
def construct(self, gradients):
|
|
364
439
|
gradients = self.flatten_gradients(gradients)
|
|
365
440
|
gradients = self.decay_weight(gradients)
|
mindspore/nn/optim/momentum.py
CHANGED
|
@@ -18,9 +18,9 @@ from __future__ import absolute_import
|
|
|
18
18
|
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
19
|
from mindspore.common.parameter import Parameter
|
|
20
20
|
from mindspore.common.tensor import Tensor
|
|
21
|
-
from mindspore.common.api import
|
|
21
|
+
from mindspore.common.api import jit
|
|
22
22
|
import mindspore.common.dtype as mstype
|
|
23
|
-
from mindspore
|
|
23
|
+
from mindspore import _checkparam as Validator
|
|
24
24
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
25
25
|
from mindspore.nn.optim.optimizer import opt_init_args_register
|
|
26
26
|
from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
|
|
@@ -69,19 +69,20 @@ class Momentum(Optimizer):
|
|
|
69
69
|
learning <https://dl.acm.org/doi/10.5555/3042817.3043064>`_ for more details.
|
|
70
70
|
|
|
71
71
|
.. math::
|
|
72
|
-
|
|
72
|
+
v_{t+1} = v_{t} \ast u + grad
|
|
73
73
|
|
|
74
74
|
If use_nesterov is True:
|
|
75
75
|
|
|
76
76
|
.. math::
|
|
77
|
-
|
|
77
|
+
p_{t+1} = p_{t} - (grad \ast lr + v_{t+1} \ast u \ast lr)
|
|
78
78
|
|
|
79
79
|
If use_nesterov is False:
|
|
80
80
|
|
|
81
81
|
.. math::
|
|
82
|
-
|
|
82
|
+
p_{t+1} = p_{t} - lr \ast v_{t+1}
|
|
83
83
|
|
|
84
|
-
Here: where grad
|
|
84
|
+
Here: where :math:`grad`, :math:`lr`, :math:`p`, :math:`v` and :math:`u` denote the gradients,
|
|
85
|
+
learning_rate, params, moments, and momentum respectively.
|
|
85
86
|
|
|
86
87
|
Note:
|
|
87
88
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -208,7 +209,7 @@ class Momentum(Optimizer):
|
|
|
208
209
|
self._get_distributed_optimizer_list("momentum", use_nesterov=self.use_nesterov)
|
|
209
210
|
self.use_dist_optimizer = self._use_distibuted_optimizer()
|
|
210
211
|
|
|
211
|
-
@
|
|
212
|
+
@jit
|
|
212
213
|
def construct(self, gradients):
|
|
213
214
|
params = self.params
|
|
214
215
|
moments = self.moments
|