mindspore 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -2
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/amp.py +52 -57
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -30,18 +30,17 @@ import mindspore.common.dtype as mstype
|
|
|
30
30
|
from mindspore.ops import operations as P
|
|
31
31
|
from mindspore.ops import functional as F
|
|
32
32
|
from mindspore.nn.cell import Cell
|
|
33
|
-
from mindspore
|
|
33
|
+
from mindspore import _checkparam as Validator
|
|
34
34
|
from mindspore import log as logger
|
|
35
|
-
from mindspore.parallel._utils import _get_parallel_mode
|
|
35
|
+
from mindspore.parallel._utils import _get_parallel_mode
|
|
36
36
|
from mindspore.context import ParallelMode
|
|
37
37
|
from mindspore.log import _LogActionOnce
|
|
38
|
-
from mindspore.
|
|
38
|
+
from mindspore.parallel._transformer.layers import _LayerNorm, _Linear, \
|
|
39
39
|
_args_type_validator_check, _valid_type_checks, _valid_value_checks, \
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
from mindspore.nn.transformer.op_parallel_config import default_dpmp_config, _PipeLineConfig, OpParallelConfig, \
|
|
40
|
+
_check_past_none_input_none, _check_input_dtype
|
|
41
|
+
from mindspore.parallel._transformer.op_parallel_config import default_dpmp_config, _PipeLineConfig, OpParallelConfig, \
|
|
43
42
|
_Config, _check_config, MoEParallelConfig
|
|
44
|
-
from mindspore.
|
|
43
|
+
from mindspore.parallel._transformer.moe import default_moe_config, MoE, _check_moe_config
|
|
45
44
|
|
|
46
45
|
__all__ = [
|
|
47
46
|
"AttentionMask",
|
|
@@ -399,13 +398,13 @@ class FeedForward(Cell):
|
|
|
399
398
|
(2, 20, 15)
|
|
400
399
|
>>> # Example 2 using custom hidden activation
|
|
401
400
|
>>> class MyActivationNoShard(nn.Cell):
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
401
|
+
... def __init__(self):
|
|
402
|
+
... super(MyActivationNoShard, self).__init__()
|
|
403
|
+
... self.add = ops.Add()
|
|
404
|
+
... def construct(self, x):
|
|
405
|
+
... return self.add(x, 0.1)
|
|
407
406
|
>>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1,
|
|
408
|
-
|
|
407
|
+
... hidden_act=MyActivationNoShard)
|
|
409
408
|
>>> tensor = Tensor(np.ones((2, 20, 15)), mstype.float32)
|
|
410
409
|
>>> output = model(tensor)
|
|
411
410
|
>>> print(output.shape)
|
|
@@ -415,16 +414,16 @@ class FeedForward(Cell):
|
|
|
415
414
|
>>> # a class function named activation_shard. It accepts the argument parallel_config (OpParallelConfig,
|
|
416
415
|
>>> # MoEParallelConfig) and set the shard for the primitives used in the construct.
|
|
417
416
|
>>> class MyActivationWithShard(nn.Cell):
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
417
|
+
... def __init__(self):
|
|
418
|
+
... super(MyActivationWithShard, self).__init__()
|
|
419
|
+
... self.add = ops.Add()
|
|
420
|
+
... def construct(self, x):
|
|
421
|
+
... return self.add(x, 0.1)
|
|
422
|
+
... def activation_shard(self, parallel_config):
|
|
423
|
+
... self.add.shard(((parallel_config.data_parallel, parallel_config.model_parallel), ()))
|
|
425
424
|
>>>
|
|
426
425
|
>>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1,
|
|
427
|
-
|
|
426
|
+
... hidden_act=MyActivationWithShard)
|
|
428
427
|
>>> tensor = Tensor(np.ones((2, 20, 15)), mstype.float32)
|
|
429
428
|
>>> output = model(tensor)
|
|
430
429
|
>>> print(output.shape)
|
|
@@ -451,7 +450,7 @@ class FeedForward(Cell):
|
|
|
451
450
|
if hidden_act is None or not (isinstance(hidden_act, str) or issubclass(hidden_act, nn.Cell)):
|
|
452
451
|
raise TypeError(f"For FeedForward cell, the hidden_act should str type or nn.Cell type, "
|
|
453
452
|
f"but got {hidden_act}.")
|
|
454
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
453
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
455
454
|
_check_config(parallel_config)
|
|
456
455
|
mp = parallel_config.model_parallel
|
|
457
456
|
if expert_num > 1:
|
|
@@ -497,9 +496,9 @@ class FeedForward(Cell):
|
|
|
497
496
|
else:
|
|
498
497
|
self.projection.shard(strategy_matmul=((dp, mp), (mp, 1)))
|
|
499
498
|
self.projection.bias.parallel_optimizer = False
|
|
500
|
-
self.dropout = nn.Dropout(
|
|
501
|
-
self.dropout_3d = nn.Dropout(
|
|
502
|
-
self.dropout_4d = nn.Dropout(
|
|
499
|
+
self.dropout = nn.Dropout(p=dropout_rate)
|
|
500
|
+
self.dropout_3d = nn.Dropout(p=dropout_rate)
|
|
501
|
+
self.dropout_4d = nn.Dropout(p=dropout_rate)
|
|
503
502
|
self.cast = P.Cast()
|
|
504
503
|
else:
|
|
505
504
|
_check_config(parallel_config)
|
|
@@ -557,16 +556,18 @@ class FeedForward(Cell):
|
|
|
557
556
|
self.projection.shard(strategy_matmul=((dp, mp), (mp, 1)),
|
|
558
557
|
strategy_bias=((dp, 1), (1,)))
|
|
559
558
|
self.projection.bias.parallel_optimizer = False
|
|
560
|
-
self.dropout = nn.Dropout(
|
|
559
|
+
self.dropout = nn.Dropout(p=dropout_rate)
|
|
561
560
|
self.dropout.dropout.shard(((dp, 1),))
|
|
562
|
-
self.dropout_3d = nn.Dropout(
|
|
561
|
+
self.dropout_3d = nn.Dropout(p=dropout_rate)
|
|
563
562
|
self.dropout_3d.dropout.shard(((dp, 1, 1),))
|
|
564
|
-
self.dropout_4d = nn.Dropout(
|
|
563
|
+
self.dropout_4d = nn.Dropout(p=dropout_rate)
|
|
565
564
|
self.dropout_4d.dropout.shard(((dp, ep, 1, 1),))
|
|
566
565
|
self.cast = P.Cast()
|
|
566
|
+
# for grouped pairwise exchange alltoall method in pass
|
|
567
|
+
self.mapping.matmul.add_prim_attr("gpea_label", True)
|
|
568
|
+
self.projection.matmul.add_prim_attr("gpea_label", True)
|
|
567
569
|
|
|
568
570
|
def construct(self, x):
|
|
569
|
-
_check_input_shape(F.shape(x), "x", self.cls_name, [2, 3])
|
|
570
571
|
_check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16], self.cls_name)
|
|
571
572
|
x = self.cast(x, mstype.float16)
|
|
572
573
|
# returned shape is [bs, seq_length, ffn_hidden_size] or [bs * seq_length, ffn_hidden_size]
|
|
@@ -639,9 +640,7 @@ class AttentionMask(Cell):
|
|
|
639
640
|
self.multiply = P.Mul().shard(((parallel_config.data_parallel, 1, 1), (1, 1, 1)))
|
|
640
641
|
|
|
641
642
|
def construct(self, input_mask):
|
|
642
|
-
_check_input_shape(F.shape(input_mask), "input_mask", self.cls_name, 2)
|
|
643
643
|
_check_input_dtype(F.dtype(input_mask), "input_mask", [mstype.float32, mstype.float16], self.cls_name)
|
|
644
|
-
_check_input_shape_value(F.shape(input_mask), 1, "input_mask", self.cls_name, self.seq_length)
|
|
645
644
|
input_mask = P.Cast()(self.not_equal(input_mask, 0), mstype.float16)
|
|
646
645
|
input_shape = P.Shape()(input_mask)
|
|
647
646
|
shape_right = (input_shape[0], 1, input_shape[1])
|
|
@@ -736,7 +735,6 @@ class VocabEmbedding(Cell):
|
|
|
736
735
|
f"model parallel for the embedding lookup.")
|
|
737
736
|
|
|
738
737
|
def construct(self, input_ids):
|
|
739
|
-
_check_input_shape(F.shape(input_ids), "input_ids", self.cls_name, 2)
|
|
740
738
|
_check_input_dtype(F.dtype(input_ids), "input_ids", [mstype.int32], self.cls_name)
|
|
741
739
|
output = self.gather(self.embedding_table, input_ids, 0)
|
|
742
740
|
return output, self.embedding_table.value()
|
|
@@ -904,7 +902,7 @@ class MultiHeadAttention(Cell):
|
|
|
904
902
|
ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
|
|
905
903
|
if batch_size:
|
|
906
904
|
Validator.check_positive_int(batch_size)
|
|
907
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
905
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
908
906
|
_check_config(parallel_config)
|
|
909
907
|
self.src_seq_length = src_seq_length
|
|
910
908
|
self.tgt_seq_length = tgt_seq_length
|
|
@@ -955,8 +953,8 @@ class MultiHeadAttention(Cell):
|
|
|
955
953
|
# Normalize factor for attention, sqrt(dk) as widely used
|
|
956
954
|
self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head)))
|
|
957
955
|
self.use_past = use_past
|
|
958
|
-
self.dropout = nn.Dropout(
|
|
959
|
-
self.prob_dropout = nn.Dropout(
|
|
956
|
+
self.dropout = nn.Dropout(p=hidden_dropout_rate)
|
|
957
|
+
self.prob_dropout = nn.Dropout(p=attention_dropout_rate)
|
|
960
958
|
self.softmax = nn.Softmax().to_float(softmax_compute_type)
|
|
961
959
|
self.softmax_3d = nn.Softmax().to_float(softmax_compute_type)
|
|
962
960
|
self.expand_dims = P.ExpandDims()
|
|
@@ -1056,9 +1054,9 @@ class MultiHeadAttention(Cell):
|
|
|
1056
1054
|
# Normalize factor for attention, sqrt(dk) as widely used
|
|
1057
1055
|
self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head)))
|
|
1058
1056
|
self.use_past = use_past
|
|
1059
|
-
self.dropout = nn.Dropout(
|
|
1057
|
+
self.dropout = nn.Dropout(p=hidden_dropout_rate)
|
|
1060
1058
|
self.dropout.dropout.shard(((parallel_config.data_parallel, 1),))
|
|
1061
|
-
self.prob_dropout = nn.Dropout(
|
|
1059
|
+
self.prob_dropout = nn.Dropout(p=attention_dropout_rate)
|
|
1062
1060
|
self.prob_dropout.dropout.shard(
|
|
1063
1061
|
((parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),))
|
|
1064
1062
|
self.softmax = nn.Softmax().to_float(softmax_compute_type)
|
|
@@ -1204,46 +1202,22 @@ class MultiHeadAttention(Cell):
|
|
|
1204
1202
|
|
|
1205
1203
|
def _get_batch_size_from_query(self, query):
|
|
1206
1204
|
r"""Get the batch size from query tensor"""
|
|
1207
|
-
batch_size = None
|
|
1208
1205
|
# For the incremental prediction, the seq length for the input is 1.
|
|
1209
|
-
if len(F.shape(query)) == 2 and self.is_first_iteration:
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
batch_size = F.shape(query)[0]
|
|
1213
|
-
return batch_size
|
|
1206
|
+
if len(F.shape(query)) == 2 and ((self.use_past and self.is_first_iteration) or (not self.use_past)):
|
|
1207
|
+
return F.shape(query)[0] // self.src_seq_length
|
|
1208
|
+
return F.shape(query)[0]
|
|
1214
1209
|
|
|
1215
1210
|
def _get_seq_length_under_incremental(self, length):
|
|
1216
1211
|
r"""Return the length of the tensor.
|
|
1217
1212
|
For the incremental prediction, the seq length for the input is 1.
|
|
1218
1213
|
"""
|
|
1219
|
-
if self.is_first_iteration:
|
|
1220
|
-
return
|
|
1221
|
-
return
|
|
1214
|
+
if self.use_past and not self.is_first_iteration:
|
|
1215
|
+
return 1
|
|
1216
|
+
return length
|
|
1222
1217
|
|
|
1223
1218
|
def _check_inputs(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
|
|
1224
1219
|
value_past=None, batch_valid_length=None):
|
|
1225
1220
|
r"""Check inputs"""
|
|
1226
|
-
if not self.use_past or (self.use_past and self.is_first_iteration):
|
|
1227
|
-
_check_shape_equal_without_batch(F.shape(query_tensor), "query_tensor", self.cls_name,
|
|
1228
|
-
[self.src_seq_length, self.hidden_size])
|
|
1229
|
-
_check_shape_equal_without_batch(F.shape(key_tensor), "key_tensor", self.cls_name,
|
|
1230
|
-
[self.tgt_seq_length, self.hidden_size])
|
|
1231
|
-
_check_shape_equal_without_batch(F.shape(value_tensor), "value_tensor", self.cls_name,
|
|
1232
|
-
[self.tgt_seq_length, self.hidden_size])
|
|
1233
|
-
if attention_mask is not None:
|
|
1234
|
-
_check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
|
|
1235
|
-
[F.shape(attention_mask)[0], self.src_seq_length, self.tgt_seq_length])
|
|
1236
|
-
else:
|
|
1237
|
-
_check_shape_equal(F.shape(query_tensor), "query_tensor", self.cls_name,
|
|
1238
|
-
[[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
|
|
1239
|
-
_check_shape_equal(F.shape(key_tensor), "key_tensor", self.cls_name,
|
|
1240
|
-
[[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
|
|
1241
|
-
_check_shape_equal(F.shape(value_tensor), "value_tensor", self.cls_name,
|
|
1242
|
-
[[self.batch_size, 1, self.hidden_size], [self.batch_size, self.hidden_size]])
|
|
1243
|
-
if attention_mask is not None:
|
|
1244
|
-
_check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
|
|
1245
|
-
[[self.batch_size, 1, self.tgt_seq_length], [self.batch_size, self.hidden_size]])
|
|
1246
|
-
|
|
1247
1221
|
_check_input_dtype(F.dtype(query_tensor), "query_tensor", [mstype.float32, mstype.float16], self.cls_name)
|
|
1248
1222
|
_check_input_dtype(F.dtype(key_tensor), "key_tensor", [mstype.float32, mstype.float16], self.cls_name)
|
|
1249
1223
|
_check_input_dtype(F.dtype(value_tensor), "value_tensor", [mstype.float32, mstype.float16], self.cls_name)
|
|
@@ -1264,13 +1238,8 @@ class MultiHeadAttention(Cell):
|
|
|
1264
1238
|
_check_past_none_input_none(self.use_past, "batch_valid_length", self.cls_name, None,
|
|
1265
1239
|
batch_valid_length_is_tensor, batch_is_default)
|
|
1266
1240
|
if self.use_past:
|
|
1267
|
-
_check_shape_equal(F.shape(key_past), "key_past", self.cls_name,
|
|
1268
|
-
[self.batch_size, self.n_head, self.size_per_head, self.tgt_seq_length])
|
|
1269
1241
|
_check_input_dtype(F.dtype(key_past), "key_past", [mstype.float16], self.cls_name)
|
|
1270
|
-
_check_shape_equal(F.shape(value_past), "value_past", self.cls_name,
|
|
1271
|
-
[self.batch_size, self.n_head, self.tgt_seq_length, self.size_per_head])
|
|
1272
1242
|
_check_input_dtype(F.dtype(value_past), "value_past", [mstype.float16], self.cls_name)
|
|
1273
|
-
_check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
|
|
1274
1243
|
_check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
|
|
1275
1244
|
return True
|
|
1276
1245
|
|
|
@@ -1528,7 +1497,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1528
1497
|
if batch_size or use_past:
|
|
1529
1498
|
Validator.check_positive_int(batch_size)
|
|
1530
1499
|
self.batch_size = batch_size
|
|
1531
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
1500
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
1532
1501
|
_check_config(parallel_config)
|
|
1533
1502
|
if num_heads % parallel_config.model_parallel != 0:
|
|
1534
1503
|
raise ValueError(
|
|
@@ -1770,17 +1739,6 @@ class TransformerEncoderLayer(Cell):
|
|
|
1770
1739
|
|
|
1771
1740
|
def _check_input(self, x, input_mask, init_reset, batch_valid_length):
|
|
1772
1741
|
r"""Check inputs"""
|
|
1773
|
-
if not self.use_past or (self.use_past and self.is_first_iteration):
|
|
1774
|
-
_check_shape_equal_without_batch(F.shape(x), "x", self.cls_name,
|
|
1775
|
-
[self.seq_length, self.hidden_size])
|
|
1776
|
-
if input_mask is not None:
|
|
1777
|
-
_check_shape_equal(F.shape(input_mask), "input_mask", self.cls_name,
|
|
1778
|
-
[F.shape(input_mask)[0], self.seq_length, self.seq_length])
|
|
1779
|
-
else:
|
|
1780
|
-
_check_shape_equal(F.shape(x), "x", self.cls_name, [self.batch_size, 1, self.hidden_size])
|
|
1781
|
-
if input_mask is not None:
|
|
1782
|
-
_check_shape_equal(F.shape(input_mask), "input_mask", self.cls_name,
|
|
1783
|
-
[F.shape(input_mask)[0], 1, self.seq_length])
|
|
1784
1742
|
_check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16], self.cls_name)
|
|
1785
1743
|
if input_mask is not None:
|
|
1786
1744
|
_check_input_dtype(F.dtype(input_mask), "input_mask", [mstype.float32, mstype.float16], self.cls_name)
|
|
@@ -1795,9 +1753,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1795
1753
|
batch_valid_length_is_tensor, batch_is_default)
|
|
1796
1754
|
|
|
1797
1755
|
if self.use_past:
|
|
1798
|
-
_check_shape_equal(F.shape(init_reset), "init_reset", self.cls_name, [1])
|
|
1799
1756
|
_check_input_dtype(F.dtype(init_reset), "init_reset", [mstype.bool_], self.cls_name)
|
|
1800
|
-
_check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
|
|
1801
1757
|
_check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
|
|
1802
1758
|
return True
|
|
1803
1759
|
|
|
@@ -1933,7 +1889,7 @@ class TransformerDecoderLayer(Cell):
|
|
|
1933
1889
|
config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
|
|
1934
1890
|
if batch_size or use_past:
|
|
1935
1891
|
Validator.check_positive_int(batch_size)
|
|
1936
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
1892
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
1937
1893
|
_check_config(parallel_config)
|
|
1938
1894
|
if num_heads % parallel_config.model_parallel != 0:
|
|
1939
1895
|
raise ValueError("For 'TransformerDecoderLayer', the class variable 'num_heads' must be divisibled by "
|
|
@@ -2226,31 +2182,14 @@ class TransformerDecoderLayer(Cell):
|
|
|
2226
2182
|
|
|
2227
2183
|
def _check_input(self, hidden_states, attention_mask, encoder_output, memory_mask, init_reset, batch_valid_length):
|
|
2228
2184
|
r"""Check inputs"""
|
|
2229
|
-
if not self.use_past or (self.use_past and self.is_first_iteration):
|
|
2230
|
-
_check_shape_equal_without_batch(F.shape(hidden_states), "hidden_states", self.cls_name,
|
|
2231
|
-
[self.tgt_seq_length, self.hidden_size])
|
|
2232
|
-
if attention_mask is not None:
|
|
2233
|
-
_check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
|
|
2234
|
-
[F.shape(attention_mask)[0], self.tgt_seq_length, self.tgt_seq_length])
|
|
2235
|
-
|
|
2236
|
-
else:
|
|
2237
|
-
_check_shape_equal(F.shape(hidden_states), "hidden_states", self.cls_name,
|
|
2238
|
-
[self.batch_size, 1, self.hidden_size])
|
|
2239
|
-
if attention_mask is not None:
|
|
2240
|
-
_check_shape_equal(F.shape(attention_mask), "attention_mask", self.cls_name,
|
|
2241
|
-
[self.batch_size, 1, self.tgt_seq_length])
|
|
2242
2185
|
_check_input_dtype(F.dtype(hidden_states), "hidden_states", [mstype.float32, mstype.float16], self.cls_name)
|
|
2243
2186
|
if attention_mask is not None:
|
|
2244
2187
|
_check_input_dtype(F.dtype(attention_mask), "attention_mask", [mstype.float32, mstype.float16],
|
|
2245
2188
|
self.cls_name)
|
|
2246
2189
|
if encoder_output is not None:
|
|
2247
|
-
_check_shape_equal_without_batch(F.shape(encoder_output), "encoder_output", self.cls_name,
|
|
2248
|
-
[self.src_seq_length, self.hidden_size])
|
|
2249
2190
|
_check_input_dtype(F.dtype(encoder_output), "encoder_output",
|
|
2250
2191
|
[mstype.float32, mstype.float16], self.cls_name)
|
|
2251
2192
|
if memory_mask is not None:
|
|
2252
|
-
_check_shape_equal_without_batch(F.shape(memory_mask), "memory_mask", self.cls_name,
|
|
2253
|
-
[self.tgt_seq_length, self.src_seq_length])
|
|
2254
2193
|
_check_input_dtype(F.dtype(memory_mask), "memory_mask",
|
|
2255
2194
|
[mstype.float32, mstype.float16], self.cls_name)
|
|
2256
2195
|
|
|
@@ -2264,9 +2203,7 @@ class TransformerDecoderLayer(Cell):
|
|
|
2264
2203
|
batch_valid_length_is_tensor, batch_is_default)
|
|
2265
2204
|
|
|
2266
2205
|
if self.use_past:
|
|
2267
|
-
_check_shape_equal(F.shape(init_reset), "init_reset", self.cls_name, [1])
|
|
2268
2206
|
_check_input_dtype(F.dtype(init_reset), "init_reset", [mstype.bool_], self.cls_name)
|
|
2269
|
-
_check_shape_equal(F.shape(batch_valid_length), "batch_valid_length", self.cls_name, [self.batch_size])
|
|
2270
2207
|
_check_input_dtype(F.dtype(batch_valid_length), "batch_valid_length", [mstype.int32], self.cls_name)
|
|
2271
2208
|
return True
|
|
2272
2209
|
|
|
@@ -2487,7 +2424,7 @@ class TransformerEncoder(Cell):
|
|
|
2487
2424
|
_check_moe_config(moe_config, parallel_config)
|
|
2488
2425
|
self.use_moe = (moe_config.expert_num > 1)
|
|
2489
2426
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2490
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
2427
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2491
2428
|
self.add = P.Add()
|
|
2492
2429
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
2493
2430
|
self.num_layers = num_layers
|
|
@@ -2723,7 +2660,7 @@ class TransformerDecoder(Cell):
|
|
|
2723
2660
|
_check_config(parallel_config)
|
|
2724
2661
|
self.use_moe = (moe_config.expert_num > 1)
|
|
2725
2662
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2726
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
2663
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2727
2664
|
self.add = P.Add()
|
|
2728
2665
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
2729
2666
|
self.num_layers = num_layers
|
|
@@ -2827,8 +2764,8 @@ class Transformer(Cell):
|
|
|
2827
2764
|
the residual addition before the layer normalization. And the default hidden act is `gelu`.
|
|
2828
2765
|
The details can be found in `Attention is all you need <https://arxiv.org/pdf/1706.03762v5.pdf>`_.
|
|
2829
2766
|
|
|
2830
|
-
|
|
2831
|
-
This is an experimental
|
|
2767
|
+
.. warning::
|
|
2768
|
+
This is an experimental API that is subject to change or deletion.
|
|
2832
2769
|
|
|
2833
2770
|
Args:
|
|
2834
2771
|
hidden_size(int): The hidden size of the input.
|
|
@@ -2986,7 +2923,7 @@ class Transformer(Cell):
|
|
|
2986
2923
|
moe_config=default_moe_config,
|
|
2987
2924
|
parallel_config=default_transformer_config):
|
|
2988
2925
|
super(Transformer, self).__init__()
|
|
2989
|
-
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,)
|
|
2926
|
+
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2990
2927
|
_check_config(parallel_config)
|
|
2991
2928
|
self.batch_size = batch_size
|
|
2992
2929
|
self.hidden_size = hidden_size
|
mindspore/parallel/_utils.py
CHANGED
|
@@ -52,9 +52,8 @@ def _is_in_hybrid_parallel_mode():
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def _is_pynative_parallel():
|
|
55
|
-
run_mode = context.get_context('mode')
|
|
56
55
|
parallel_mode = context.get_auto_parallel_context('parallel_mode')
|
|
57
|
-
return
|
|
56
|
+
return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
|
|
58
57
|
context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
|
|
59
58
|
|
|
60
59
|
|
|
@@ -229,7 +229,7 @@ def set_algo_parameters(**kwargs):
|
|
|
229
229
|
"""
|
|
230
230
|
Set parameters in the algorithm for parallel strategy searching. See a typical use in
|
|
231
231
|
`test_auto_parallel_resnet.py
|
|
232
|
-
<https://gitee.com/mindspore/mindspore/blob/r2.0
|
|
232
|
+
<https://gitee.com/mindspore/mindspore/blob/r2.0/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
|
|
233
233
|
|
|
234
234
|
Note:
|
|
235
235
|
The attribute name is required. This interface works ONLY in AUTO_PARALLEL mode.
|
|
@@ -22,8 +22,9 @@ from collections import defaultdict
|
|
|
22
22
|
import numpy as np
|
|
23
23
|
import mindspore as ms
|
|
24
24
|
from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
|
|
25
|
-
_transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir,
|
|
26
|
-
_extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num
|
|
25
|
+
_transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, \
|
|
26
|
+
_extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
|
|
27
|
+
_merge_protobuf_strategy, _merge_json_strategy
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
__all__ = ["merge_pipeline_strategys", "rank_list_for_transform", "transform_checkpoint_by_rank",
|
|
@@ -33,6 +34,9 @@ __all__ = ["merge_pipeline_strategys", "rank_list_for_transform", "transform_che
|
|
|
33
34
|
def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
|
|
34
35
|
"""
|
|
35
36
|
Merge parallel strategy between all pipeline stages in pipeline parallel mode.
|
|
37
|
+
For more details about converting distributed Checkpoint, please refer to
|
|
38
|
+
`Distributed Resilience Training and
|
|
39
|
+
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
|
|
36
40
|
|
|
37
41
|
Note:
|
|
38
42
|
Strategy file of each pipeline stage should be included in src_strategy_dirs.
|
|
@@ -55,32 +59,24 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
|
|
|
55
59
|
_make_dir(dst_strategy_dir, "path")
|
|
56
60
|
if not os.path.isdir(src_strategy_dirs):
|
|
57
61
|
raise NotADirectoryError("src_strategy_dirs {} is not a directory.".format(src_strategy_dirs))
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if pipeline_stage in merged_stage:
|
|
69
|
-
continue
|
|
70
|
-
for layout_item in layout_items:
|
|
71
|
-
layout_item.param_name = "-".join([str(pipeline_stage), layout_item.param_name])
|
|
72
|
-
dst_parallel_strategy_map.parallel_strategy_item.extend(strategy_items)
|
|
73
|
-
dst_parallel_strategy_map.parallel_layout_item.extend(layout_items)
|
|
74
|
-
merged_stage.append(pipeline_stage)
|
|
75
|
-
dst_parallel_strategy_map.current_stage = 1
|
|
76
|
-
with open(dst_strategy_file, "wb") as f:
|
|
77
|
-
f.write(dst_parallel_strategy_map.SerializeToString())
|
|
62
|
+
src_strategy_files_protobuf = glob.glob(os.path.join(src_strategy_dirs, "*.ckpt"))
|
|
63
|
+
src_strategy_files_json = glob.glob(os.path.join(src_strategy_dirs, "*.json"))
|
|
64
|
+
if src_strategy_files_protobuf and src_strategy_files_json:
|
|
65
|
+
raise ValueError("The strategys format should be all '.ckpt' or all '.json'")
|
|
66
|
+
is_protobuf = len(src_strategy_files_protobuf) > 0
|
|
67
|
+
if is_protobuf:
|
|
68
|
+
_merge_protobuf_strategy(src_strategy_files_protobuf, dst_strategy_file)
|
|
69
|
+
else:
|
|
70
|
+
_merge_json_strategy(src_strategy_files_json, dst_strategy_file)
|
|
71
|
+
|
|
78
72
|
|
|
79
73
|
|
|
80
74
|
def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=None):
|
|
81
75
|
"""
|
|
82
76
|
List of original distributed checkpoint rank index for obtaining the target checkpoint of a rank_id
|
|
83
|
-
during the distributed checkpoint conversion.
|
|
77
|
+
during the distributed checkpoint conversion. For more details about converting distributed Checkpoint,
|
|
78
|
+
please refer to `Distributed Resilience Training and
|
|
79
|
+
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
|
|
84
80
|
|
|
85
81
|
Args:
|
|
86
82
|
rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
|
|
@@ -106,7 +102,7 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
|
|
|
106
102
|
>>> rank_list = rank_list_for_transform(rank_id, "./src_strategy.ckpt", "./dst_strategy.ckpt")
|
|
107
103
|
>>> checkpoint_files_map = {}
|
|
108
104
|
>>> for rank in rank_list:
|
|
109
|
-
|
|
105
|
+
... checkpoint_files_map[rank] = "./pangu{}-100_2.ckpt".format(rank)
|
|
110
106
|
|
|
111
107
|
"""
|
|
112
108
|
if not isinstance(rank_id, int):
|
|
@@ -133,14 +129,18 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
|
|
|
133
129
|
src_rank_id_start = src_pipeline_stage_id * src_stage_device_num
|
|
134
130
|
result_set.update([src_rank_id_start + rank for rank in needed_rank_list_in_local_stage])
|
|
135
131
|
handled_pipeline_stage.append(src_pipeline_stage_id)
|
|
136
|
-
|
|
132
|
+
result_list = list(result_set)
|
|
133
|
+
result_list.sort(reverse=True)
|
|
134
|
+
return result_list
|
|
137
135
|
|
|
138
136
|
|
|
139
137
|
def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_file_name,
|
|
140
138
|
src_strategy_file=None, dst_strategy_file=None):
|
|
141
139
|
"""
|
|
142
140
|
Transform distributed checkpoint from source sharding strategy to destination sharding strategy by rank
|
|
143
|
-
for a network.
|
|
141
|
+
for a network. For more details about converting distributed Checkpoint, please refer to
|
|
142
|
+
`Distributed Resilience Training and
|
|
143
|
+
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
|
|
144
144
|
|
|
145
145
|
Args:
|
|
146
146
|
rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
|
|
@@ -167,13 +167,13 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
|
|
|
167
167
|
|
|
168
168
|
Examples:
|
|
169
169
|
>>> dst_device_num = 8
|
|
170
|
-
>>> for rank_id in range(dst_device_num)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
170
|
+
>>> for rank_id in range(dst_device_num):
|
|
171
|
+
... rank_list = rank_list_for_transform(rank_id, "./src_strategy.ckpt", "./dst_strategy.ckpt")
|
|
172
|
+
... checkpoint_files_map = {}
|
|
173
|
+
... for rank in rank_list:
|
|
174
|
+
... checkpoint_files_map[rank] = "./origin_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank)
|
|
175
|
+
... save_checkpoint_file_name = "./new_checkpoint_rank{}/pangu{}-100_2.ckpt".format(rank_id)
|
|
176
|
+
... transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_file_name,
|
|
177
177
|
... "./src_strategy.ckpt", "./dst_strategy.ckpt")
|
|
178
178
|
|
|
179
179
|
"""
|
|
@@ -222,9 +222,12 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
222
222
|
dst_strategy_file=None):
|
|
223
223
|
"""
|
|
224
224
|
Transform distributed checkpoint from source sharding strategy to destination sharding strategy for a rank.
|
|
225
|
+
For more details about converting distributed Checkpoint, please refer to
|
|
226
|
+
`Distributed Resilience Training and
|
|
227
|
+
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/resilience_train_and_predict.html>`_.
|
|
225
228
|
|
|
226
229
|
Note:
|
|
227
|
-
The src_checkpoints_dir directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
|
|
230
|
+
The `src_checkpoints_dir` directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
|
|
228
231
|
rank number should be set to a subdirectory and the checkpoint file is stored in this subdirectory. If multiple
|
|
229
232
|
files exist in a rank directory, the last file in the lexicgraphic order would be selected.
|
|
230
233
|
|
mindspore/parallel/shard.py
CHANGED
|
@@ -154,7 +154,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
154
154
|
in_strategy and out_strategy define the input and output layout respectively.
|
|
155
155
|
in_strategy/out_strategy should be a tuple, each element of which corresponds to the desired layout of
|
|
156
156
|
this input/output, and None represents data_parallel,
|
|
157
|
-
which can refer to the description of
|
|
157
|
+
which can refer to the description of :func:`mindspore.ops.Primitive.shard`.
|
|
158
158
|
The parallel strategies of remaining operators are derived from the strategy specified by the input and output.
|
|
159
159
|
|
|
160
160
|
Note:
|
|
@@ -162,16 +162,18 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
162
162
|
set the parallel mode in `set_auto_parallel_context` to "auto_parallel"
|
|
163
163
|
and the search mode to "sharding_propagation".
|
|
164
164
|
If the input contain Parameter, its strategy should be set in `in_strategy`.
|
|
165
|
+
For more details about shard, please refer to `Functional Operator Sharding
|
|
166
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.0/parallel/pynative_shard_function_parallel.html>`_.
|
|
165
167
|
|
|
166
168
|
Args:
|
|
167
169
|
fn (Union[Cell, Function]): Function to be executed in parallel.
|
|
168
170
|
Its arguments and return value must be Tensor or Parameter.
|
|
169
|
-
If fn is a Cell with parameters, fn needs to be an instantiated object,
|
|
171
|
+
If `fn` is a Cell with parameters, `fn` needs to be an instantiated object,
|
|
170
172
|
otherwise its arguments cannot be accessed.
|
|
171
173
|
in_strategy (tuple): Define the layout of inputs, each element of the tuple should be a tuple or None.
|
|
172
174
|
Tuple defines the layout of the corresponding input
|
|
173
175
|
and None represents a data parallel strategy.
|
|
174
|
-
out_strategy (Union[tuple, None]): Define the layout of outputs similar with in_strategy
|
|
176
|
+
out_strategy (Union[tuple, None]): Define the layout of outputs similar with `in_strategy`.
|
|
175
177
|
It is not in use right now. Default: None.
|
|
176
178
|
parameter_plan (Union[dict, None]): Define the layout for the specified parameters. Each element in dict
|
|
177
179
|
defines the layout of the parameter like "param_name: layout".
|
|
@@ -180,7 +182,7 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
180
182
|
If the parameter name is incorrect or the corresponding parameter
|
|
181
183
|
has been set, the parameter setting will be ignored.
|
|
182
184
|
Default: None.
|
|
183
|
-
device (string): Select a certain device target. It is not in use right now.
|
|
185
|
+
device (string): Select a certain `device` target. It is not in use right now.
|
|
184
186
|
Support ["CPU", "GPU", "Ascend"]. Default: "Ascend".
|
|
185
187
|
level (int): Option for parallel strategy infer algorithm, namely the object function, maximize computation
|
|
186
188
|
over communication ratio, maximize speed performance, minimize memory usage etc. It is not in
|
|
@@ -190,20 +192,17 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
190
192
|
Function, return the function that will be executed under auto parallel process.
|
|
191
193
|
|
|
192
194
|
Raises:
|
|
193
|
-
AssertionError:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
TypeError:
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
- If any value in `parameter_plan` is not a tuple.
|
|
205
|
-
- If `device` is not a str.
|
|
206
|
-
- If `level` is not a integer.
|
|
195
|
+
AssertionError: If execute mode is not PYNATIVE_MODE.
|
|
196
|
+
AssertionError: If parallel mode is not "auto_parallel".
|
|
197
|
+
AssertionError: If search_mode it not "sharding_propagation".
|
|
198
|
+
AssertionError: If device_target it not "Ascend" or "GPU".
|
|
199
|
+
TypeError: If `in_strategy` is not a tuple.
|
|
200
|
+
TypeError: If `out_strategy` is not a tuple or None.
|
|
201
|
+
TypeError: If `parameter_plan` is not a dict or None.
|
|
202
|
+
TypeError: If any key in `parameter_plan` is not a str.
|
|
203
|
+
TypeError: If any value in `parameter_plan` is not a tuple.
|
|
204
|
+
TypeError: If `device` is not a str.
|
|
205
|
+
TypeError: If `level` is not an integer.
|
|
207
206
|
|
|
208
207
|
Supported Platforms:
|
|
209
208
|
``Ascend`` ``GPU``
|
|
@@ -32,11 +32,11 @@ def check_valid_character_of_path(file_path):
|
|
|
32
32
|
Returns:
|
|
33
33
|
bool, whether valid.
|
|
34
34
|
"""
|
|
35
|
-
re_path = r'^[/\\_a-zA-Z0-9-_
|
|
35
|
+
re_path = r'^[/\\_a-zA-Z0-9-_.@]+$'
|
|
36
36
|
path_valid = re.fullmatch(re_path, file_path)
|
|
37
37
|
if not path_valid:
|
|
38
38
|
msg = "The output path of profiler only supports alphabets(a-zA-Z), " \
|
|
39
|
-
"digit(0-9) or {'-', '_', '.', '/'}, but got the absolute path= " + file_path
|
|
39
|
+
"digit(0-9) or {'-', '_', '.', '/', '@'}, but got the absolute path= " + file_path
|
|
40
40
|
raise RuntimeError(msg)
|
|
41
41
|
|
|
42
42
|
|