mindspore 2.0.0a0__cp37-none-any.whl → 2.0.0rc1__cp37-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +9064 -0
- mindspore/__init__.py +4 -2
- mindspore/_akg/akg/composite/build_module.py +11 -0
- mindspore/_akg/akg/config/repository_cuda.json +11 -0
- mindspore/_akg/akg/tvm/contrib/nvcc.py +4 -3
- mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +52 -57
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/config/super_bar_config.json +512 -0
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libicudata.so.69 +0 -0
- mindspore/lib/libicui18n.so.69 +0 -0
- mindspore/lib/libicuuc.so.69 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/{libmindspore_ascend.so → libmindspore_ascend.so.2} +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/scipy/linalg.py +10 -114
- mindspore/scipy/ops.py +2 -2
- mindspore/scipy/ops_wrapper.py +1 -1
- mindspore/scipy/optimize/_bfgs.py +1 -1
- mindspore/scipy/optimize/_lagrange.py +200 -0
- mindspore/scipy/optimize/line_search.py +3 -2
- mindspore/scipy/optimize/minimize.py +41 -2
- mindspore/scipy/sparse/__init__.py +2 -2
- mindspore/scipy/sparse/linalg.py +4 -464
- mindspore/scipy/utils.py +1 -1
- mindspore/scipy/utils_const.py +7 -1
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +648 -574
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import mindspore as ms
|
|
20
20
|
from mindspore import Tensor
|
|
21
|
-
from mindspore.ops import composite as C
|
|
22
21
|
from mindspore.ops import operations as P
|
|
23
22
|
from mindspore.ops.operations import _grad_ops as G
|
|
24
23
|
from mindspore.ops.operations.array_ops import Fills, NonZero
|
|
@@ -27,11 +26,11 @@ from mindspore.ops.functional import broadcast_gradient_args
|
|
|
27
26
|
from mindspore.ops import functional as F
|
|
28
27
|
from mindspore.ops._grad.grad_base import bprop_getters, create_tensor_by_element
|
|
29
28
|
from mindspore.ops.primitive import constexpr
|
|
29
|
+
from mindspore.ops.primitive import _primexpr
|
|
30
30
|
from mindspore.common import dtype as mstype
|
|
31
31
|
from mindspore.common.sparse_tensor import RowTensorInner
|
|
32
|
-
from mindspore.ops._utils.utils import range_op, get_1d_shape, generate_shape_index
|
|
33
|
-
from mindspore.ops._grad.grad_base import dyn_rank, convert_to_tensor,
|
|
34
|
-
dyn_fill
|
|
32
|
+
from mindspore.ops._utils.utils import range_op, get_1d_shape, generate_shape_index
|
|
33
|
+
from mindspore.ops._grad.grad_base import dyn_rank, convert_to_tensor, dyn_ones, dyn_fill
|
|
35
34
|
from mindspore.ops._grad.grad_base import sum_grad_reduce_axis
|
|
36
35
|
from mindspore.ops.operations._inner_ops import DynamicBroadcastGradientArgs
|
|
37
36
|
from ..operations._inner_ops import DynamicBroadcastGradientArgs, IsSubClass
|
|
@@ -98,47 +97,6 @@ def get_bprop_dtype(self):
|
|
|
98
97
|
return bprop
|
|
99
98
|
|
|
100
99
|
|
|
101
|
-
dout_cast = C.MultitypeFuncGraph("dout_cast")
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
@dout_cast.register("Tensor", "Tensor")
|
|
105
|
-
def dout_cast_tensor(dout, x):
|
|
106
|
-
"""Casts dout to the dtype of x for Tensor."""
|
|
107
|
-
cast = P.Cast()
|
|
108
|
-
get_dtype = P.DType()
|
|
109
|
-
dx = cast(dout, get_dtype(x))
|
|
110
|
-
return dx
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@dout_cast.register("Number", "Number")
|
|
114
|
-
def dout_cast_number(dout, x):
|
|
115
|
-
"""Casts dout to the dtype of x for Number."""
|
|
116
|
-
cast = P.Cast()
|
|
117
|
-
get_dtype = P.DType()
|
|
118
|
-
dx = cast(dout, get_dtype(x))
|
|
119
|
-
return dx
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
@dout_cast.register("RowTensor", "Tensor")
|
|
123
|
-
def dout_cast_row_tensor(dout, x):
|
|
124
|
-
"""Casts dout values to the dtype of x for RowTensor."""
|
|
125
|
-
cast = P.Cast()
|
|
126
|
-
get_dtype = P.DType()
|
|
127
|
-
values = cast(dout.values, get_dtype(x))
|
|
128
|
-
return RowTensorInner(dout.indices, values, dout.dense_shape)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
@bprop_getters.register(P.Cast)
|
|
132
|
-
def get_bprop_cast(self):
|
|
133
|
-
"""Generate bprop for Cast"""
|
|
134
|
-
|
|
135
|
-
def bprop(x, t, out, dout):
|
|
136
|
-
dx = dout_cast(dout, x)
|
|
137
|
-
return dx, zeros_like(t)
|
|
138
|
-
|
|
139
|
-
return bprop
|
|
140
|
-
|
|
141
|
-
|
|
142
100
|
@bprop_getters.register(P.Shape)
|
|
143
101
|
def get_bprop_shape(self):
|
|
144
102
|
"""Generate bprop for Shape"""
|
|
@@ -198,7 +156,7 @@ def get_bprop_reshape(self):
|
|
|
198
156
|
|
|
199
157
|
def bprop(x, shp, out, dout):
|
|
200
158
|
shapex = shape_op(x)
|
|
201
|
-
if
|
|
159
|
+
if F.is_sequence_value_unknown(shapex):
|
|
202
160
|
shapex = dyn_shape_op(x)
|
|
203
161
|
return reshape(dout, shapex), zeros_like(shp)
|
|
204
162
|
|
|
@@ -211,7 +169,7 @@ def get_bprop_expand_dims(self):
|
|
|
211
169
|
|
|
212
170
|
def bprop(x, axis, out, dout):
|
|
213
171
|
shapex = shape_op(x)
|
|
214
|
-
if
|
|
172
|
+
if F.is_sequence_value_unknown(shapex):
|
|
215
173
|
shapex = dyn_shape_op(x)
|
|
216
174
|
return reshape(dout, shapex), zeros_like(axis)
|
|
217
175
|
|
|
@@ -224,7 +182,7 @@ def get_bprop_squeeze(self):
|
|
|
224
182
|
|
|
225
183
|
def bprop(x, out, dout):
|
|
226
184
|
shapex = shape_op(x)
|
|
227
|
-
if
|
|
185
|
+
if F.is_sequence_value_unknown(shapex):
|
|
228
186
|
shapex = dyn_shape_op(x)
|
|
229
187
|
return (reshape(dout, shapex),)
|
|
230
188
|
|
|
@@ -238,7 +196,7 @@ def get_bprop_flatten(self):
|
|
|
238
196
|
|
|
239
197
|
def bprop(x, out, dout):
|
|
240
198
|
shape_x = shape_op(x)
|
|
241
|
-
if
|
|
199
|
+
if F.is_sequence_value_unknown(shape_x):
|
|
242
200
|
shape_x = dyn_shape_op(x)
|
|
243
201
|
dx = flatten_grad(dout, shape_x)
|
|
244
202
|
return (dx,)
|
|
@@ -246,7 +204,7 @@ def get_bprop_flatten(self):
|
|
|
246
204
|
return bprop
|
|
247
205
|
|
|
248
206
|
|
|
249
|
-
@
|
|
207
|
+
@_primexpr
|
|
250
208
|
def _tile_shape(multiples, shapex):
|
|
251
209
|
"""Calculate [1,2], [3, 4] -> [1,3,2,4]."""
|
|
252
210
|
len_muli = len(multiples)
|
|
@@ -297,7 +255,7 @@ def get_bprop_tile(self):
|
|
|
297
255
|
|
|
298
256
|
def bprop(x, multiples, out, dout):
|
|
299
257
|
shapex = shape_op(x)
|
|
300
|
-
if
|
|
258
|
+
if F.is_sequence_value_unknown(shapex):
|
|
301
259
|
shapex = dyn_shape_op(x)
|
|
302
260
|
if isinstance(multiples, tuple) and isinstance(shapex, tuple):
|
|
303
261
|
r_shape = _tile_shape(multiples, shapex)
|
|
@@ -340,7 +298,7 @@ def get_bprop_embedding_lookup(self):
|
|
|
340
298
|
|
|
341
299
|
def bprop_sparse(x, indices, offset, out, dout):
|
|
342
300
|
x_shp = shape_op(x)
|
|
343
|
-
if
|
|
301
|
+
if F.is_sequence_value_unknown(x_shp):
|
|
344
302
|
raise RuntimeError("Now, EmbeddingLookup op's grad don't support Dynamic Sense!")
|
|
345
303
|
new_indices = sub_op(indices, offset)
|
|
346
304
|
indices_size = size_op(new_indices)
|
|
@@ -359,7 +317,7 @@ def get_bprop_embedding_lookup(self):
|
|
|
359
317
|
return bprop_sparse
|
|
360
318
|
|
|
361
319
|
|
|
362
|
-
@
|
|
320
|
+
@_primexpr
|
|
363
321
|
def make_begin(shp):
|
|
364
322
|
"""Creates a tuple with zero according to the shape."""
|
|
365
323
|
begin = tuple([0 for _ in shp])
|
|
@@ -379,7 +337,7 @@ def get_bprop_padding(self):
|
|
|
379
337
|
def bprop(x, out, dout):
|
|
380
338
|
shp = shape_op(x)
|
|
381
339
|
begin = ()
|
|
382
|
-
if
|
|
340
|
+
if F.is_sequence_value_unknown(shp):
|
|
383
341
|
shp = dyn_shape_op(x)
|
|
384
342
|
begin = make_dynamic_begin(shp)
|
|
385
343
|
else:
|
|
@@ -390,36 +348,7 @@ def get_bprop_padding(self):
|
|
|
390
348
|
return bprop
|
|
391
349
|
|
|
392
350
|
|
|
393
|
-
@
|
|
394
|
-
def _transpose_perm_positive(perm):
|
|
395
|
-
res = []
|
|
396
|
-
for value in perm:
|
|
397
|
-
value = value if (value >= 0) else (value + len(perm))
|
|
398
|
-
res.append(value)
|
|
399
|
-
return tuple(res)
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
def _dyn_transpose_perm_positive(perm):
|
|
403
|
-
return (perm + dyn_size(perm)) % (dyn_size(perm))
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
@bprop_getters.register(P.Transpose)
|
|
407
|
-
def get_bprop_transpose(self):
|
|
408
|
-
"""Generate bprop for Transpose"""
|
|
409
|
-
|
|
410
|
-
def bprop(x, perm, out, dout):
|
|
411
|
-
is_mutable, perm = convert_to_tensor(perm)
|
|
412
|
-
if is_mutable:
|
|
413
|
-
perm = _dyn_transpose_perm_positive(perm)
|
|
414
|
-
return transpose(dout, dyn_invert_permutation(perm)), zeros_like(perm)
|
|
415
|
-
|
|
416
|
-
perm = _transpose_perm_positive(perm)
|
|
417
|
-
return transpose(dout, invert_permutation(perm)), zeros_like(perm)
|
|
418
|
-
|
|
419
|
-
return bprop
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
@constexpr
|
|
351
|
+
@_primexpr
|
|
423
352
|
def _concat_grad_uniform(input_shapes, input_nums):
|
|
424
353
|
"""Helper function for bprop of Concat"""
|
|
425
354
|
is_uniform = True
|
|
@@ -472,12 +401,6 @@ def get_bprop_concat(self):
|
|
|
472
401
|
return bprop
|
|
473
402
|
|
|
474
403
|
|
|
475
|
-
@constexpr
|
|
476
|
-
def _slice_grad_pad(begins, sizes, shapes):
|
|
477
|
-
pads = tuple((begin, shape - begin - size) for begin, size, shape in zip(begins, sizes, shapes))
|
|
478
|
-
return pads
|
|
479
|
-
|
|
480
|
-
|
|
481
404
|
@bprop_getters.register(P.Slice)
|
|
482
405
|
def get_bprop_slice(self):
|
|
483
406
|
"""Generate bprop for Slice"""
|
|
@@ -489,17 +412,17 @@ def get_bprop_slice(self):
|
|
|
489
412
|
return bprop
|
|
490
413
|
|
|
491
414
|
|
|
492
|
-
@
|
|
493
|
-
def _generate_inverse_index(x_shape, axis):
|
|
415
|
+
@_primexpr
|
|
416
|
+
def _generate_inverse_index(x_shape, axis, batch_dims=0):
|
|
494
417
|
x_rank = len(x_shape)
|
|
495
418
|
index = tuple(range(x_rank))
|
|
496
419
|
if axis < 0:
|
|
497
420
|
axis += x_rank
|
|
498
|
-
perm = index[1:1 + axis] + (
|
|
421
|
+
perm = index[:batch_dims] + index[batch_dims + 1:1 + axis] + (index[batch_dims],) + index[1 + axis:]
|
|
499
422
|
return perm
|
|
500
423
|
|
|
501
424
|
|
|
502
|
-
@
|
|
425
|
+
@_primexpr
|
|
503
426
|
def _regenerate_output_shape(x_shp, ind_shp, axis):
|
|
504
427
|
rank = len(x_shp)
|
|
505
428
|
if axis < 0:
|
|
@@ -517,29 +440,52 @@ def _dyn_regenerate_output_shape(x_shp, ind_shp, axis):
|
|
|
517
440
|
return out_shape
|
|
518
441
|
|
|
519
442
|
|
|
520
|
-
def _dyn_generate_shape_index(out_shape, indices_shape, axis):
|
|
443
|
+
def _dyn_generate_shape_index(out_shape, indices_shape, axis, batch_dims=0):
|
|
521
444
|
"""Get tranpose order"""
|
|
522
445
|
out_rank = F.reshape(dyn_shape_op(out_shape), ())
|
|
523
446
|
ind_rank = F.reshape(dyn_shape_op(indices_shape), ())
|
|
524
447
|
if axis < 0:
|
|
525
448
|
axis += out_rank - ind_rank + 1
|
|
526
449
|
perm_part1 = P.Range()(F.cast(0, mstype.int32), F.cast(20, mstype.int32), F.cast(1, mstype.int32))
|
|
527
|
-
|
|
450
|
+
ind_end = axis + ind_rank - batch_dims
|
|
451
|
+
perm_part1 = perm_part1[axis: ind_end]
|
|
528
452
|
index = P.Range()(F.cast(0, mstype.int32), F.cast(out_rank, mstype.int32), F.cast(1, mstype.int32))
|
|
529
|
-
perm =
|
|
453
|
+
perm = F.hstack((index[:batch_dims], perm_part1, index[batch_dims:axis], index[ind_end:]))
|
|
530
454
|
return perm
|
|
531
455
|
|
|
532
456
|
|
|
533
|
-
def _dyn_generate_inverse_index(x_shp, axis):
|
|
457
|
+
def _dyn_generate_inverse_index(x_shp, axis, batch_dims=0):
|
|
534
458
|
"""Get tranpose order"""
|
|
535
459
|
x_rank = F.reshape(dyn_shape_op(x_shp), ())
|
|
536
460
|
index = P.Range()(F.cast(0, mstype.int32), F.cast(x_rank, mstype.int32), F.cast(1, mstype.int32))
|
|
537
461
|
if axis < 0:
|
|
538
462
|
axis += x_rank
|
|
539
|
-
perm =
|
|
463
|
+
perm = F.hstack((index[:batch_dims], index[batch_dims + 1:1 + axis], index[batch_dims], index[1 + axis:]))
|
|
540
464
|
return perm
|
|
541
465
|
|
|
542
466
|
|
|
467
|
+
def calculate_batch_gather(values, indices, x_shape, axis, batch_dims):
|
|
468
|
+
"""Calculate gather grad with batch_dims"""
|
|
469
|
+
values_shape = dyn_shape_op(values)
|
|
470
|
+
batch_size = F.prod(x_shape[:batch_dims])
|
|
471
|
+
batch_size = F.cast(batch_size, mstype.int32)
|
|
472
|
+
axis_dim = F.cast(x_shape[axis], mstype.int32)
|
|
473
|
+
|
|
474
|
+
# Move batch dimension to first non-batch dimension
|
|
475
|
+
values = values.reshape((-1,) + values.shape[batch_dims:])
|
|
476
|
+
indices = indices.reshape((-1,) + indices.shape[batch_dims:])
|
|
477
|
+
offset = P.Range()(F.cast(0, mstype.int32), batch_size * axis_dim, axis_dim)
|
|
478
|
+
offset_shape = F.hstack([batch_size] + [Tensor(1, dtype=mstype.int32) for _ in range(len(indices.shape) - 1)])
|
|
479
|
+
offset = reshape(offset, offset_shape)
|
|
480
|
+
indices = indices + offset
|
|
481
|
+
num_segments = batch_size * axis_dim
|
|
482
|
+
params_grad = unsorted_segment_sum(values, indices, num_segments)
|
|
483
|
+
grad_shape = dyn_shape_op(params_grad)
|
|
484
|
+
ret_shape = F.hstack([values_shape[:batch_dims], F.cast(axis_dim, mstype.int64), grad_shape[1:]])
|
|
485
|
+
params_grad = reshape(params_grad, ret_shape)
|
|
486
|
+
return params_grad
|
|
487
|
+
|
|
488
|
+
|
|
543
489
|
@bprop_getters.register(P.Gather)
|
|
544
490
|
@bprop_getters.register(P.GatherV2)
|
|
545
491
|
def get_bprop_gather_v2(self):
|
|
@@ -551,6 +497,9 @@ def get_bprop_gather_v2(self):
|
|
|
551
497
|
x_shp = dyn_shape_op(x)
|
|
552
498
|
ind_shp = dyn_shape_op(indices)
|
|
553
499
|
out_shp = dyn_shape_op(dout)
|
|
500
|
+
batch_dims = self.batch_dims
|
|
501
|
+
if batch_dims < 0:
|
|
502
|
+
batch_dims += F.reshape(dyn_shape_op(ind_shp), ())
|
|
554
503
|
|
|
555
504
|
if F.rank(dout) == 0:
|
|
556
505
|
dout = P.ExpandDims()(dout, -1)
|
|
@@ -560,17 +509,20 @@ def get_bprop_gather_v2(self):
|
|
|
560
509
|
dout = reshape(dout, out_shp)
|
|
561
510
|
|
|
562
511
|
# Example: out_shape:(3,2,3) axis 1 -> (1,0,2)
|
|
563
|
-
perm_1 = _dyn_generate_shape_index(out_shp, ind_shp, axis)
|
|
512
|
+
perm_1 = _dyn_generate_shape_index(out_shp, ind_shp, axis, batch_dims)
|
|
564
513
|
values_transpose = transpose(dout, perm_1)
|
|
565
|
-
|
|
566
|
-
|
|
514
|
+
if batch_dims > 0:
|
|
515
|
+
params_grad = calculate_batch_gather(values_transpose, indices, x_shp, axis, batch_dims)
|
|
516
|
+
else:
|
|
517
|
+
params_grad = unsorted_segment_sum(values_transpose, indices, x_shp[axis])
|
|
518
|
+
perm_2 = _dyn_generate_inverse_index(x_shp, axis, batch_dims)
|
|
567
519
|
params_grad = transpose(params_grad, perm_2)
|
|
568
520
|
return params_grad, zeros_like(orig_indices), zeros_like(axis)
|
|
569
521
|
|
|
570
522
|
def bprop(x, indices, axis, out, dout):
|
|
571
523
|
is_mutable, axis = convert_to_tensor(axis)
|
|
572
|
-
if (
|
|
573
|
-
|
|
524
|
+
if (F.is_sequence_value_unknown(shape_op(x)) or F.is_sequence_value_unknown(shape_op(indices)) or \
|
|
525
|
+
F.is_sequence_value_unknown(shape_op(dout))) and is_mutable:
|
|
574
526
|
return _dyn_bprop_gather_v2(x, indices, axis, dout)
|
|
575
527
|
orig_indices = indices
|
|
576
528
|
if F.rank(dout) == 0:
|
|
@@ -585,15 +537,19 @@ def get_bprop_gather_v2(self):
|
|
|
585
537
|
x_shp = shape_op(x)
|
|
586
538
|
out_shp = shape_op(dout)
|
|
587
539
|
ind_shp = shape_op(indices)
|
|
540
|
+
batch_dims = self.batch_dims
|
|
541
|
+
if batch_dims < 0:
|
|
542
|
+
batch_dims += len(ind_shp)
|
|
588
543
|
# Example: out_shape:(3,2,3) axis 1 -> (1,0,2)
|
|
589
|
-
perm_1 = generate_shape_index(out_shp, ind_shp, axis)
|
|
544
|
+
perm_1 = generate_shape_index(out_shp, ind_shp, axis, batch_dims)
|
|
590
545
|
values_transpose = transpose(dout, perm_1)
|
|
591
|
-
|
|
592
|
-
|
|
546
|
+
dyn_x_sape = dyn_shape_op(x)
|
|
547
|
+
if batch_dims > 0:
|
|
548
|
+
params_grad = calculate_batch_gather(values_transpose, indices, dyn_x_sape, axis, batch_dims)
|
|
593
549
|
else:
|
|
594
|
-
params_grad = unsorted_segment_sum(values_transpose, indices,
|
|
550
|
+
params_grad = unsorted_segment_sum(values_transpose, indices, dyn_x_sape[axis])
|
|
595
551
|
# Example: out_shape:(3,2,3) axis 2 -> (1,2,0)
|
|
596
|
-
perm_2 = _generate_inverse_index(x_shp, axis)
|
|
552
|
+
perm_2 = _generate_inverse_index(x_shp, axis, batch_dims)
|
|
597
553
|
params_grad = transpose(params_grad, perm_2)
|
|
598
554
|
return params_grad, zeros_like(orig_indices), zeros_like(axis)
|
|
599
555
|
|
|
@@ -854,7 +810,7 @@ def get_bprop_strided_slice(self):
|
|
|
854
810
|
|
|
855
811
|
def bprop(x, begin, end, strides, out, dout):
|
|
856
812
|
x_shape = shape_op(x)
|
|
857
|
-
if
|
|
813
|
+
if F.is_sequence_value_unknown(x_shape):
|
|
858
814
|
x_shape = dyn_shape_op(x)
|
|
859
815
|
dx = input_grad(dout, x_shape, begin, end, strides)
|
|
860
816
|
return dx, zeros_like(begin), zeros_like(end), zeros_like(strides)
|
|
@@ -926,7 +882,7 @@ def get_bprop_resize_nearest_neighbor(self):
|
|
|
926
882
|
tensor_shape = P.TensorShape()
|
|
927
883
|
|
|
928
884
|
def bprop(inputs, out, dout):
|
|
929
|
-
if (
|
|
885
|
+
if F.is_sequence_value_unknown(shape_op(inputs)) or F.is_sequence_shape_unknown(shape_op(inputs)):
|
|
930
886
|
shp = tensor_shape(inputs)
|
|
931
887
|
else:
|
|
932
888
|
shp = shape_op(inputs)
|
|
@@ -944,7 +900,7 @@ def get_bprop_gather_nd(self):
|
|
|
944
900
|
|
|
945
901
|
def bprop(x, indices, out, dout):
|
|
946
902
|
shp = shape_op(x)
|
|
947
|
-
if
|
|
903
|
+
if F.is_sequence_value_unknown(shp):
|
|
948
904
|
shp = dyn_shape_op(x)
|
|
949
905
|
return op(indices, dout, shp), zeros_like(indices)
|
|
950
906
|
|
|
@@ -1127,7 +1083,7 @@ def _gather_drop_negatives(params,
|
|
|
1127
1083
|
is_positive = greater_equal(ids, 0)
|
|
1128
1084
|
is_positive_shape = shape_op(is_positive)
|
|
1129
1085
|
gathered_shape = shape_op(gathered)
|
|
1130
|
-
if
|
|
1086
|
+
if F.is_sequence_value_unknown(gathered_shape) or F.is_sequence_value_unknown(is_positive_shape):
|
|
1131
1087
|
gathered_shape = dyn_shape_op(gathered)
|
|
1132
1088
|
rank_gathered = dyn_rank(gathered)
|
|
1133
1089
|
fill_gathered = dyn_fill(mstype.int64, gathered_shape, 1)
|
|
@@ -1212,6 +1168,8 @@ def get_bprop_unsorted_segment_prod(self):
|
|
|
1212
1168
|
unsorted_segment_prod = P.UnsortedSegmentProd()
|
|
1213
1169
|
|
|
1214
1170
|
def bprop(x, segment_ids, num_segments, out, dout):
|
|
1171
|
+
if x.dtype == mstype.complex64 or x.dtype == mstype.complex128:
|
|
1172
|
+
raise TypeError("For 'UnsortedSegmentProd', complex number is not supported for gradient currently.")
|
|
1215
1173
|
if x.dtype == mstype.complex64 or x.dtype == mstype.complex128:
|
|
1216
1174
|
is_zero = equal(x, F.scalar_to_tensor(0).astype(x.dtype))
|
|
1217
1175
|
else:
|
|
@@ -1301,19 +1259,30 @@ def get_bprop_broadcast_to(self):
|
|
|
1301
1259
|
x_shape = shape_op(x)
|
|
1302
1260
|
dout_shape = shape_op(dout)
|
|
1303
1261
|
broadcast_shape = shape_op(out)
|
|
1304
|
-
dynamic =
|
|
1262
|
+
dynamic = F.is_sequence_value_unknown(x_shape) or F.is_sequence_value_unknown(dout_shape)
|
|
1305
1263
|
if not dynamic and x_shape == dout_shape:
|
|
1306
1264
|
return (dout,)
|
|
1307
|
-
dynamic = dynamic or
|
|
1265
|
+
dynamic = dynamic or F.is_sequence_value_unknown(broadcast_shape)
|
|
1266
|
+
out_type = dout.dtype
|
|
1308
1267
|
if not dynamic:
|
|
1309
1268
|
_, reduction_axes = broadcast_gradient_args(broadcast_shape, x_shape)
|
|
1310
|
-
|
|
1269
|
+
if out_type in (ms.int16, ms.int32, ms.int64):
|
|
1270
|
+
dout = P.Cast()(dout, ms.float32)
|
|
1271
|
+
reduced_grad = reduce_keep_dim(dout, reduction_axes)
|
|
1272
|
+
reduced_grad = P.Cast()(reduced_grad, out_type)
|
|
1273
|
+
else:
|
|
1274
|
+
reduced_grad = reduce_keep_dim(dout, reduction_axes)
|
|
1311
1275
|
dx = reshape(reduced_grad, x_shape)
|
|
1312
1276
|
else:
|
|
1313
1277
|
x_shape = dyn_shape_op(x)
|
|
1314
1278
|
broadcast_shape = dyn_shape_op(out)
|
|
1315
1279
|
_, reduction_axes = DynamicBroadcastGradientArgs()(broadcast_shape, x_shape)
|
|
1316
|
-
|
|
1280
|
+
if out_type in (ms.int16, ms.int32, ms.int64):
|
|
1281
|
+
dout = P.Cast()(dout, ms.float32)
|
|
1282
|
+
reduced_grad = sum_grad_reduce_axis(dout, reduction_axes, keep_dims=True)
|
|
1283
|
+
reduced_grad = P.Cast()(reduced_grad, out_type)
|
|
1284
|
+
else:
|
|
1285
|
+
reduced_grad = sum_grad_reduce_axis(dout, reduction_axes, keep_dims=True)
|
|
1317
1286
|
dx = reshape(reduced_grad, x_shape)
|
|
1318
1287
|
return (dx,)
|
|
1319
1288
|
|
mindspore/ops/_grad/grad_base.py
CHANGED
|
@@ -21,7 +21,9 @@ from mindspore.common import Tensor
|
|
|
21
21
|
from mindspore.ops import operations as P
|
|
22
22
|
from mindspore.common import dtype as mstype
|
|
23
23
|
from mindspore.ops.operations._inner_ops import DynamicBroadcastTo
|
|
24
|
-
|
|
24
|
+
from mindspore.ops import functional as F
|
|
25
|
+
from mindspore.ops.operations import _sequence_ops as seq_op
|
|
26
|
+
import mindspore as ms
|
|
25
27
|
dyn_shape = P.TensorShape()
|
|
26
28
|
cast = P.Cast()
|
|
27
29
|
|
|
@@ -91,6 +93,26 @@ def convert_to_tensor(data):
|
|
|
91
93
|
if isinstance(data, Tensor):
|
|
92
94
|
return True, data
|
|
93
95
|
|
|
96
|
+
if isinstance(data, list):
|
|
97
|
+
if F.is_sequence_value_unknown(data):
|
|
98
|
+
data_tensor = seq_op.ListToTensor()(data, ms.int64)
|
|
99
|
+
return True, data_tensor
|
|
100
|
+
return False, data
|
|
101
|
+
if isinstance(data, tuple):
|
|
102
|
+
if F.is_sequence_value_unknown(data):
|
|
103
|
+
data_tensor = seq_op.TupleToTensor()(data, ms.int64)
|
|
104
|
+
return True, data_tensor
|
|
105
|
+
return False, data
|
|
106
|
+
if isinstance(data, int):
|
|
107
|
+
if not F.isconstant(data):
|
|
108
|
+
data_tensor = F.scalar_to_tensor(data, ms.int64)
|
|
109
|
+
return True, data_tensor
|
|
110
|
+
return False, data
|
|
111
|
+
if isinstance(data, float):
|
|
112
|
+
if not F.isconstant(data):
|
|
113
|
+
data_tensor = F.scalar_to_tensor(data, ms.float32)
|
|
114
|
+
return True, data_tensor
|
|
115
|
+
return False, data
|
|
94
116
|
return False, data
|
|
95
117
|
|
|
96
118
|
|
|
@@ -16,15 +16,14 @@
|
|
|
16
16
|
|
|
17
17
|
"""Define the grad rules of clip operations."""
|
|
18
18
|
from __future__ import absolute_import
|
|
19
|
-
|
|
20
19
|
from mindspore.ops._grad.grad_base import bprop_getters, dyn_fill
|
|
21
20
|
from mindspore.ops import operations as P
|
|
21
|
+
from mindspore.ops import functional as F
|
|
22
22
|
from mindspore.ops.operations import _inner_ops as inner
|
|
23
23
|
from mindspore.ops.operations import _grad_ops as G
|
|
24
24
|
from mindspore.common import dtype as mstype
|
|
25
25
|
from mindspore.ops._grad.grad_math_ops import _sum_grad
|
|
26
26
|
from mindspore.ops._grad.grad_math_ops import binop_grad_common
|
|
27
|
-
from mindspore.ops._utils.utils import is_shape_unknown
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
@bprop_getters.register(inner.ClipByNorm)
|
|
@@ -72,7 +71,7 @@ def get_bprop_clip_by_norm(self):
|
|
|
72
71
|
temp_num = 2.0
|
|
73
72
|
temp_out = mul_op(reduce_sum_dout_x, cast_x)
|
|
74
73
|
shape_cast_x = shape_op(cast_x)
|
|
75
|
-
if
|
|
74
|
+
if F.is_sequence_value_unknown(shape_cast_x):
|
|
76
75
|
fill_x = dyn_fill(type_op(temp_out), dyn_shape_op(cast_x), temp_num)
|
|
77
76
|
else:
|
|
78
77
|
fill_x = fill_op(type_op(temp_out), shape_cast_x, temp_num)
|
|
@@ -207,16 +207,16 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
207
207
|
z = F.depend(z, dout)
|
|
208
208
|
real_grad = all_reduce(z)
|
|
209
209
|
real_grad = F.tensor_mul(real_grad, scale)
|
|
210
|
-
|
|
211
|
-
|
|
210
|
+
if opt_shard:
|
|
211
|
+
return (real_grad, cast(out_tensor, dtype(z)))
|
|
212
|
+
return F.depend((cast(out_tensor, dtype(x)), cast(out_tensor, dtype(z))), assign(z, real_grad))
|
|
212
213
|
else:
|
|
213
214
|
if issubclass_(F.typeof(dout), mstype.tensor):
|
|
214
215
|
z = F.depend(z, dout)
|
|
215
216
|
real_grad = all_reduce(z)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return (real_grad, cast(out_tensor, dtype(z)))
|
|
217
|
+
if opt_shard:
|
|
218
|
+
return (real_grad, cast(out_tensor, dtype(z)))
|
|
219
|
+
return F.depend((cast(out_tensor, dtype(x)), cast(out_tensor, dtype(z))), assign(z, real_grad))
|
|
220
220
|
return F.depend((cast(out_tensor, dtype(x)), cast(out_tensor, dtype(z))), assign_out)
|
|
221
221
|
return bprop
|
|
222
222
|
|
|
@@ -289,6 +289,7 @@ def get_bprop_mini_step_all_gather(self):
|
|
|
289
289
|
dx = F.depend(dx, z)
|
|
290
290
|
else:
|
|
291
291
|
dx = dout
|
|
292
|
+
|
|
292
293
|
return (dx, zeros_like(z))
|
|
293
294
|
|
|
294
295
|
return bprop
|
|
@@ -299,21 +300,33 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
299
300
|
"""Generate bprop for _MicroStepAllGather"""
|
|
300
301
|
fusion = self.get_attr_dict()["fusion"]
|
|
301
302
|
mean_flag = self.get_attr_dict()["mean_flag"]
|
|
302
|
-
do_mirror =
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
303
|
+
do_mirror = False
|
|
304
|
+
if self.group != "":
|
|
305
|
+
do_mirror = self.get_attr_dict()["do_mirror"]
|
|
306
|
+
if do_mirror:
|
|
307
|
+
scale = 1.0 / self.rank_size
|
|
308
|
+
all_reduce = AllReduce(ReduceOp.SUM, self.group).add_prim_attr("fusion", fusion)
|
|
309
|
+
rank = get_rank(self.group)
|
|
310
|
+
dev_num = get_group_size(self.group)
|
|
311
|
+
split = P.Split(output_num=dev_num)
|
|
312
|
+
if self.instance_name:
|
|
313
|
+
instance_name = "grad_" + self.instance_name
|
|
314
|
+
all_reduce.set_prim_instance_name(instance_name)
|
|
311
315
|
cast = P.Cast()
|
|
312
316
|
dtype = P.DType()
|
|
313
317
|
out_tensor = Tensor(1.0, mstype.float16)
|
|
318
|
+
with_mirror_operator = self.get_attr_dict()["with_mirror_operator"]
|
|
314
319
|
|
|
315
320
|
# z: accu_grad
|
|
316
321
|
def bprop(x, z, out, dout):
|
|
322
|
+
if with_mirror_operator:
|
|
323
|
+
if not do_mirror:
|
|
324
|
+
return (dout, cast(out_tensor, dtype(z)))
|
|
325
|
+
real_grad = all_reduce(dout)
|
|
326
|
+
real_grad = split(real_grad)[rank]
|
|
327
|
+
if mean_flag:
|
|
328
|
+
real_grad = F.tensor_mul(real_grad, scale)
|
|
329
|
+
return (real_grad, cast(out_tensor, dtype(z)))
|
|
317
330
|
z = F.depend(z, dout)
|
|
318
331
|
if not do_mirror:
|
|
319
332
|
return (z, cast(out_tensor, dtype(z)))
|
|
@@ -451,7 +464,9 @@ def get_bprop_mirror_operator(self):
|
|
|
451
464
|
group = self.get_attr_dict()['group']
|
|
452
465
|
dev_num = self.get_attr_dict()['dev_num']
|
|
453
466
|
mean_flag = self.get_attr_dict()['mean_flag']
|
|
467
|
+
dev_num_r = 1.0
|
|
454
468
|
if dev_num > 1:
|
|
469
|
+
dev_num_r = 1.0 / dev_num
|
|
455
470
|
all_reduce = AllReduce(group=group)
|
|
456
471
|
all_gather = AllGather(group=group)
|
|
457
472
|
mul = P.Mul()
|
|
@@ -473,15 +488,11 @@ def get_bprop_mirror_operator(self):
|
|
|
473
488
|
if mean_flag:
|
|
474
489
|
if issubclass_(F.typeof(dout), mstype.tensor):
|
|
475
490
|
dx = all_reduce(dout)
|
|
476
|
-
|
|
477
|
-
num = F.scalar_cast(dev_num, F.dtype(dx))
|
|
478
|
-
dx = mul(dx, cast(F.scalar_to_tensor(float_one/num), F.dtype(dx)))
|
|
491
|
+
dx = mul(dx, cast(F.scalar_to_tensor(dev_num_r), F.dtype(dx)))
|
|
479
492
|
else:
|
|
480
493
|
indices = all_gather(dout.indices)
|
|
481
494
|
grad = all_gather(dout.values)
|
|
482
|
-
|
|
483
|
-
num = F.scalar_cast(dev_num, F.dtype(grad))
|
|
484
|
-
grad = mul(grad, cast(F.scalar_to_tensor(float_one/num), F.dtype(grad)))
|
|
495
|
+
grad = mul(grad, cast(F.scalar_to_tensor(dev_num_r), F.dtype(grad)))
|
|
485
496
|
dx = RowTensorInner(indices, grad, dout.dense_shape)
|
|
486
497
|
else:
|
|
487
498
|
if issubclass_(F.typeof(dout), mstype.tensor):
|
|
@@ -504,6 +515,7 @@ def get_bprop_mirror_mini_step_operator(self):
|
|
|
504
515
|
group = self.group
|
|
505
516
|
dev_num = self.dev_num
|
|
506
517
|
mean_flag = self.mean_flag
|
|
518
|
+
dev_num_r = 1.0 / dev_num
|
|
507
519
|
|
|
508
520
|
all_reduce = AllReduce(group=group)
|
|
509
521
|
mul = P.Mul()
|
|
@@ -529,9 +541,7 @@ def get_bprop_mirror_mini_step_operator(self):
|
|
|
529
541
|
dx = real_grad
|
|
530
542
|
else:
|
|
531
543
|
dx = dout
|
|
532
|
-
|
|
533
|
-
num = F.scalar_cast(dev_num, F.dtype(dx))
|
|
534
|
-
dx = mul(dx, cast(F.scalar_to_tensor(float_one/num), F.dtype(dx)))
|
|
544
|
+
dx = mul(dx, cast(F.scalar_to_tensor(dev_num_r), F.dtype(dx)))
|
|
535
545
|
else:
|
|
536
546
|
dx = zeros_like(x) # The grad accumulation do not support row tensor now
|
|
537
547
|
else:
|