PyPI - mindspore - Versions diffs - 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl - Mend

mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show

mindspore/.commit_id +1 -1
mindspore/Third_Party_Open_Source_Software_Notice +2 -2
mindspore/__init__.py +5 -2
mindspore/_akg/akg/build_module.py +5 -6
mindspore/_akg/akg/composite/build_module.py +49 -16
mindspore/_akg/akg/composite/split_stitch.py +10 -11
mindspore/_akg/akg/config/repository.json +195 -0
mindspore/_akg/akg/global_configs.py +5 -1
mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
mindspore/_akg/akg/tvm/api.py +4 -3
mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
mindspore/_akg/akg/tvm/build_module.py +16 -1
mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
mindspore/_akg/akg/tvm/ir_builder.py +1 -1
mindspore/_akg/akg/tvm/module.py +1 -2
mindspore/_akg/akg/tvm/stmt.py +2 -2
mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
mindspore/_akg/akg/utils/kernel_exec.py +58 -260
mindspore/_akg/akg/utils/op_dsl.py +17 -1
mindspore/_akg/akg/utils/result_analysis.py +4 -24
mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_check_jit_forbidden_api.py +5 -1
mindspore/_checkparam.py +79 -62
mindspore/_extends/graph_kernel/__init__.py +0 -1
mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
mindspore/_extends/graph_kernel/splitter.py +1 -9
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
mindspore/_extends/parse/__init__.py +19 -17
mindspore/_extends/parse/namespace.py +7 -36
mindspore/_extends/parse/parser.py +375 -189
mindspore/_extends/parse/resources.py +36 -41
mindspore/_extends/parse/standard_method.py +350 -245
mindspore/_extends/parse/trope.py +2 -12
mindspore/_extends/remote/kernel_build_server.py +24 -7
mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
mindspore/_install_custom.py +43 -0
mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/amp.py +85 -19
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/boost/base.py +2 -2
mindspore/boost/boost.py +27 -32
mindspore/boost/boost_cell_wrapper.py +37 -13
mindspore/boost/grad_accumulation.py +1 -1
mindspore/boost/grad_freeze.py +34 -6
mindspore/boost/group_loss_scale_manager.py +15 -14
mindspore/boost/less_batch_normalization.py +28 -3
mindspore/common/__init__.py +15 -11
mindspore/common/_auto_dynamic.py +68 -0
mindspore/common/_jit_fallback_utils.py +111 -0
mindspore/common/_register_for_adapter.py +17 -5
mindspore/common/_register_for_tensor.py +2 -2
mindspore/common/_stub_tensor.py +18 -15
mindspore/common/_utils.py +31 -7
mindspore/common/api.py +269 -101
mindspore/common/auto_dynamic_shape.py +498 -0
mindspore/common/dtype.py +61 -21
mindspore/common/dump.py +9 -7
mindspore/common/initializer.py +106 -76
mindspore/common/jit_config.py +35 -14
mindspore/common/lazy_inline.py +187 -0
mindspore/common/mindir_util.py +101 -0
mindspore/common/mutable.py +10 -13
mindspore/common/parameter.py +246 -55
mindspore/common/seed.py +13 -7
mindspore/common/sparse_tensor.py +29 -33
mindspore/common/tensor.py +907 -251
mindspore/communication/__init__.py +7 -4
mindspore/communication/_comm_helper.py +84 -4
mindspore/communication/management.py +160 -88
mindspore/config/op_info.config +99 -75
mindspore/config/super_bar_config.json +36 -4
mindspore/context.py +526 -219
mindspore/dataset/__init__.py +9 -46
mindspore/dataset/audio/__init__.py +4 -19
mindspore/dataset/audio/transforms.py +545 -233
mindspore/dataset/audio/utils.py +21 -18
mindspore/dataset/callback/ds_callback.py +42 -13
mindspore/dataset/core/config.py +158 -100
mindspore/dataset/core/validator_helpers.py +1 -63
mindspore/dataset/debug/debug_hook.py +45 -13
mindspore/dataset/debug/pre_defined_hook.py +5 -5
mindspore/dataset/engine/__init__.py +0 -5
mindspore/dataset/engine/cache_client.py +38 -15
mindspore/dataset/engine/datasets.py +615 -278
mindspore/dataset/engine/datasets_audio.py +154 -283
mindspore/dataset/engine/datasets_standard_format.py +104 -116
mindspore/dataset/engine/datasets_text.py +443 -326
mindspore/dataset/engine/datasets_user_defined.py +251 -164
mindspore/dataset/engine/datasets_vision.py +839 -1443
mindspore/dataset/engine/iterators.py +11 -4
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
mindspore/dataset/engine/obs/util.py +3 -0
mindspore/dataset/engine/offload.py +6 -6
mindspore/dataset/engine/queue.py +15 -14
mindspore/dataset/engine/samplers.py +39 -23
mindspore/dataset/engine/serializer_deserializer.py +22 -6
mindspore/dataset/engine/validators.py +21 -331
mindspore/dataset/text/__init__.py +5 -33
mindspore/dataset/text/transforms.py +334 -165
mindspore/dataset/text/utils.py +215 -145
mindspore/dataset/transforms/__init__.py +1 -1
mindspore/dataset/transforms/c_transforms.py +3 -2
mindspore/dataset/transforms/py_transforms_util.py +40 -12
mindspore/dataset/transforms/transforms.py +174 -71
mindspore/dataset/utils/browse_dataset.py +25 -17
mindspore/dataset/utils/line_reader.py +24 -21
mindspore/dataset/vision/__init__.py +5 -26
mindspore/dataset/vision/c_transforms.py +177 -165
mindspore/dataset/vision/py_transforms.py +114 -119
mindspore/dataset/vision/py_transforms_util.py +54 -51
mindspore/dataset/vision/transforms.py +1127 -381
mindspore/dataset/vision/utils.py +54 -38
mindspore/dataset/vision/validators.py +12 -2
mindspore/experimental/map_parameter.py +38 -4
mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
mindspore/experimental/optim/adam.py +192 -0
mindspore/experimental/optim/adamw.py +181 -0
mindspore/experimental/optim/lr_scheduler.py +1427 -0
mindspore/experimental/optim/optimizer.py +252 -0
mindspore/experimental/optim/sgd.py +147 -0
mindspore/gen_ops.py +273 -0
mindspore/include/OWNERS +1 -2
mindspore/include/api/context.h +21 -1
mindspore/include/api/data_type.h +2 -1
mindspore/include/api/graph.h +0 -15
mindspore/include/api/kernel.h +2 -0
mindspore/include/api/kernel_api.h +37 -12
mindspore/include/api/model.h +29 -42
mindspore/include/api/model_group.h +14 -3
mindspore/include/api/model_parallel_runner.h +18 -2
mindspore/include/api/serialization.h +26 -0
mindspore/include/api/status.h +1 -0
mindspore/include/api/types.h +38 -4
mindspore/include/c_api/ms/abstract.h +67 -0
mindspore/include/c_api/ms/attribute.h +197 -0
mindspore/include/c_api/ms/base/handle_types.h +43 -0
mindspore/include/c_api/ms/base/macros.h +32 -0
mindspore/include/c_api/ms/base/status.h +33 -0
mindspore/include/c_api/ms/base/types.h +282 -0
mindspore/include/c_api/ms/context.h +102 -0
mindspore/include/c_api/ms/graph.h +160 -0
mindspore/include/c_api/ms/node.h +606 -0
mindspore/include/c_api/ms/tensor.h +161 -0
mindspore/include/c_api/ms/value.h +84 -0
mindspore/include/c_api/status_c.h +3 -0
mindspore/include/dataset/constants.h +6 -12
mindspore/include/dataset/execute.h +23 -13
mindspore/include/dataset/text.h +26 -26
mindspore/include/dataset/transforms.h +25 -31
mindspore/include/dataset/vision.h +60 -60
mindspore/include/dataset/vision_ascend.h +5 -6
mindspore/include/dataset/vision_lite.h +17 -17
mindspore/include/mindapi/base/format.h +0 -1
mindspore/include/mindapi/base/type_id.h +2 -1
mindspore/include/mindapi/base/types.h +5 -1
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libjemalloc.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libmpi_adapter.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/libps_cache.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/log.py +9 -6
mindspore/mindrecord/filereader.py +33 -4
mindspore/mindrecord/filewriter.py +70 -35
mindspore/mindrecord/mindpage.py +40 -34
mindspore/mindrecord/shardreader.py +1 -1
mindspore/mindrecord/shardsegment.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
mindspore/mindrecord/tools/csv_to_mr.py +29 -13
mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
mindspore/nn/cell.py +463 -169
mindspore/nn/dynamic_lr.py +47 -43
mindspore/nn/layer/activation.py +225 -82
mindspore/nn/layer/basic.py +121 -79
mindspore/nn/layer/channel_shuffle.py +21 -21
mindspore/nn/layer/combined.py +33 -26
mindspore/nn/layer/container.py +277 -22
mindspore/nn/layer/conv.py +441 -304
mindspore/nn/layer/dense.py +19 -13
mindspore/nn/layer/embedding.py +62 -49
mindspore/nn/layer/flash_attention.py +264 -0
mindspore/nn/layer/image.py +50 -39
mindspore/nn/layer/math.py +62 -51
mindspore/nn/layer/normalization.py +219 -167
mindspore/nn/layer/padding.py +58 -70
mindspore/nn/layer/pooling.py +334 -287
mindspore/nn/layer/rnn_cells.py +53 -38
mindspore/nn/layer/rnns.py +59 -56
mindspore/nn/layer/thor_layer.py +52 -44
mindspore/nn/layer/timedistributed.py +6 -4
mindspore/nn/layer/transformer.py +284 -164
mindspore/nn/learning_rate_schedule.py +34 -25
mindspore/nn/loss/__init__.py +3 -2
mindspore/nn/loss/loss.py +554 -311
mindspore/nn/optim/ada_grad.py +12 -9
mindspore/nn/optim/adadelta.py +14 -11
mindspore/nn/optim/adafactor.py +19 -16
mindspore/nn/optim/adam.py +62 -47
mindspore/nn/optim/adamax.py +13 -10
mindspore/nn/optim/adasum.py +12 -8
mindspore/nn/optim/asgd.py +10 -9
mindspore/nn/optim/ftrl.py +20 -17
mindspore/nn/optim/lamb.py +16 -12
mindspore/nn/optim/lars.py +8 -6
mindspore/nn/optim/lazyadam.py +25 -20
mindspore/nn/optim/momentum.py +10 -7
mindspore/nn/optim/optimizer.py +61 -9
mindspore/nn/optim/proximal_ada_grad.py +14 -13
mindspore/nn/optim/rmsprop.py +17 -13
mindspore/nn/optim/rprop.py +30 -17
mindspore/nn/optim/sgd.py +40 -23
mindspore/nn/optim/thor.py +24 -26
mindspore/nn/probability/bijector/bijector.py +11 -11
mindspore/nn/probability/bijector/exp.py +1 -1
mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
mindspore/nn/probability/bijector/invert.py +1 -1
mindspore/nn/probability/bijector/power_transform.py +29 -29
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +5 -5
mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
mindspore/nn/probability/distribution/_utils/utils.py +1 -1
mindspore/nn/probability/distribution/bernoulli.py +9 -9
mindspore/nn/probability/distribution/beta.py +8 -8
mindspore/nn/probability/distribution/categorical.py +23 -15
mindspore/nn/probability/distribution/cauchy.py +5 -6
mindspore/nn/probability/distribution/distribution.py +3 -3
mindspore/nn/probability/distribution/exponential.py +4 -4
mindspore/nn/probability/distribution/gamma.py +10 -10
mindspore/nn/probability/distribution/geometric.py +8 -8
mindspore/nn/probability/distribution/gumbel.py +8 -9
mindspore/nn/probability/distribution/half_normal.py +5 -5
mindspore/nn/probability/distribution/laplace.py +5 -5
mindspore/nn/probability/distribution/log_normal.py +12 -11
mindspore/nn/probability/distribution/logistic.py +8 -8
mindspore/nn/probability/distribution/normal.py +6 -5
mindspore/nn/probability/distribution/poisson.py +10 -11
mindspore/nn/probability/distribution/student_t.py +8 -9
mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
mindspore/nn/probability/distribution/uniform.py +11 -11
mindspore/nn/reinforcement/tensor_array.py +2 -2
mindspore/nn/sparse/sparse.py +9 -9
mindspore/nn/wrap/cell_wrapper.py +188 -63
mindspore/nn/wrap/grad_reducer.py +21 -12
mindspore/nn/wrap/loss_scale.py +136 -49
mindspore/numpy/__init__.py +4 -4
mindspore/numpy/array_creations.py +55 -56
mindspore/numpy/array_ops.py +134 -35
mindspore/numpy/logic_ops.py +66 -20
mindspore/numpy/math_ops.py +142 -139
mindspore/numpy/utils_const.py +2 -2
mindspore/offline_debug/convert_async.py +2 -2
mindspore/ops/_grad_experimental/__init__.py +7 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/cast.py +52 -0
mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
mindspore/ops/_op_impl/aicpu/eps.py +32 -0
mindspore/ops/_op_impl/aicpu/eye.py +4 -4
mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
mindspore/ops/_op_impl/aicpu/lu.py +39 -0
mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
mindspore/ops/_op_impl/aicpu/median.py +1 -0
mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
mindspore/ops/_op_impl/tbe/__init__.py +6 -4
mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
mindspore/ops/_op_impl/tbe/im2col.py +4 -4
mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
mindspore/ops/_primitive_cache.py +1 -1
mindspore/ops/_tracefunc.py +241 -0
mindspore/ops/_utils/utils.py +10 -2
mindspore/ops/_vmap/vmap_array_ops.py +5 -3
mindspore/ops/_vmap/vmap_base.py +5 -4
mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
mindspore/ops/_vmap/vmap_math_ops.py +5 -2
mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
mindspore/ops/arg_dtype_cast.py +54 -0
mindspore/ops/composite/__init__.py +7 -5
mindspore/ops/composite/base.py +78 -34
mindspore/ops/composite/math_ops.py +5 -695
mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
mindspore/ops/deprecated.py +304 -0
mindspore/ops/function/__init__.py +41 -4
mindspore/ops/function/array_func.py +1108 -467
mindspore/ops/function/clip_func.py +94 -27
mindspore/ops/function/debug_func.py +3 -1
mindspore/ops/function/grad/grad_func.py +82 -73
mindspore/ops/function/image_func.py +28 -12
mindspore/ops/function/linalg_func.py +135 -39
mindspore/ops/function/math_func.py +3779 -894
mindspore/ops/function/nn_func.py +1584 -657
mindspore/ops/function/parameter_func.py +13 -3
mindspore/ops/function/random_func.py +247 -153
mindspore/ops/function/sparse_func.py +14 -11
mindspore/ops/function/sparse_unary_func.py +173 -47
mindspore/ops/function/spectral_func.py +8 -4
mindspore/ops/function/vmap_func.py +8 -7
mindspore/ops/functional.py +47 -16
mindspore/ops/op_info_register.py +346 -86
mindspore/ops/operations/__init__.py +38 -22
mindspore/ops/operations/_grad_ops.py +145 -149
mindspore/ops/operations/_inner_ops.py +298 -56
mindspore/ops/operations/_ms_kernel.py +3 -3
mindspore/ops/operations/_quant_ops.py +24 -28
mindspore/ops/operations/_rl_inner_ops.py +9 -7
mindspore/ops/operations/_scalar_ops.py +115 -0
mindspore/ops/operations/_sequence_ops.py +148 -10
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/_thor_ops.py +2 -2
mindspore/ops/operations/array_ops.py +1239 -561
mindspore/ops/operations/comm_ops.py +166 -90
mindspore/ops/operations/control_ops.py +3 -3
mindspore/ops/operations/custom_ops.py +124 -102
mindspore/ops/operations/debug_ops.py +24 -11
mindspore/ops/operations/image_ops.py +86 -71
mindspore/ops/operations/inner_ops.py +18 -13
mindspore/ops/operations/linalg_ops.py +30 -11
mindspore/ops/operations/math_ops.py +1730 -435
mindspore/ops/operations/nn_ops.py +1953 -943
mindspore/ops/operations/other_ops.py +65 -43
mindspore/ops/operations/random_ops.py +258 -98
mindspore/ops/operations/rl_ops.py +4 -36
mindspore/ops/operations/sparse_ops.py +38 -33
mindspore/ops/operations/spectral_ops.py +8 -4
mindspore/ops/primitive.py +66 -44
mindspore/ops/signature.py +5 -5
mindspore/parallel/_auto_parallel_context.py +80 -19
mindspore/parallel/_cost_model_context.py +42 -0
mindspore/parallel/_offload_context.py +162 -72
mindspore/parallel/_parallel_serialization.py +2 -2
mindspore/parallel/_ps_context.py +16 -4
mindspore/parallel/_recovery_context.py +2 -1
mindspore/parallel/_tensor.py +15 -13
mindspore/parallel/_transformer/layers.py +8 -6
mindspore/parallel/_transformer/loss.py +1 -0
mindspore/parallel/_transformer/moe.py +7 -7
mindspore/parallel/_transformer/op_parallel_config.py +12 -1
mindspore/parallel/_transformer/transformer.py +34 -14
mindspore/parallel/_utils.py +36 -14
mindspore/parallel/algo_parameter_config.py +114 -20
mindspore/parallel/checkpoint_transform.py +16 -18
mindspore/parallel/shard.py +16 -13
mindspore/profiler/__init__.py +1 -1
mindspore/profiler/common/struct_type.py +3 -3
mindspore/profiler/common/util.py +3 -2
mindspore/profiler/envprofiling.py +11 -4
mindspore/profiler/parser/aicpu_data_parser.py +5 -3
mindspore/profiler/parser/ascend_flops_generator.py +94 -0
mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
mindspore/profiler/parser/ascend_op_generator.py +276 -0
mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
mindspore/profiler/parser/base_timeline_generator.py +11 -7
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
mindspore/profiler/parser/flops_parser.py +15 -11
mindspore/profiler/parser/framework_parser.py +92 -73
mindspore/profiler/parser/hccl_parser.py +16 -12
mindspore/profiler/parser/integrator.py +22 -11
mindspore/profiler/parser/memory_usage_parser.py +36 -11
mindspore/profiler/parser/minddata_analyzer.py +12 -14
mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
mindspore/profiler/parser/msadvisor_parser.py +8 -4
mindspore/profiler/parser/op_intermediate_parser.py +5 -2
mindspore/profiler/parser/optime_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +4 -5
mindspore/profiler/parser/step_trace_parser.py +11 -14
mindspore/profiler/profiling.py +678 -377
mindspore/rewrite/api/node.py +211 -54
mindspore/rewrite/api/node_type.py +5 -0
mindspore/rewrite/api/pattern_engine.py +22 -23
mindspore/rewrite/api/scoped_value.py +20 -17
mindspore/rewrite/api/symbol_tree.py +252 -106
mindspore/rewrite/api/tree_node_helper.py +3 -0
mindspore/rewrite/ast_helpers/__init__.py +2 -1
mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
mindspore/rewrite/common/rewrite_elog.py +5 -1
mindspore/rewrite/namer.py +51 -51
mindspore/rewrite/namespace.py +14 -5
mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
mindspore/rewrite/node/call_function.py +79 -0
mindspore/rewrite/node/cell_container.py +135 -0
mindspore/rewrite/node/control_flow.py +88 -0
mindspore/rewrite/{node.py → node/node.py} +313 -247
mindspore/rewrite/node/node_manager.py +254 -0
mindspore/rewrite/node/node_topological_manager.py +243 -0
mindspore/rewrite/parsers/arguments_parser.py +22 -21
mindspore/rewrite/parsers/assign_parser.py +225 -239
mindspore/rewrite/parsers/attribute_parser.py +9 -7
mindspore/rewrite/parsers/class_def_parser.py +179 -218
mindspore/rewrite/parsers/constant_parser.py +9 -6
mindspore/rewrite/parsers/container_parser.py +9 -7
mindspore/rewrite/parsers/for_parser.py +36 -15
mindspore/rewrite/parsers/function_def_parser.py +23 -20
mindspore/rewrite/parsers/if_parser.py +28 -24
mindspore/rewrite/parsers/module_parser.py +202 -25
mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
mindspore/rewrite/parsers/return_parser.py +6 -6
mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
mindspore/rewrite/sparsify/sparsify.py +4 -1
mindspore/rewrite/sparsify/utils.py +11 -5
mindspore/rewrite/symbol_tree.py +577 -732
mindspore/rewrite/symbol_tree_builder.py +9 -175
mindspore/rewrite/symbol_tree_dumper.py +2 -2
mindspore/run_check/_check_version.py +46 -39
mindspore/run_check/run_check.py +3 -2
mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
mindspore/safeguard/rewrite_obfuscation.py +517 -0
mindspore/scipy/__init__.py +1 -1
mindspore/scipy/linalg.py +67 -61
mindspore/scipy/ops.py +5 -41
mindspore/scipy/ops_grad.py +3 -2
mindspore/scipy/ops_wrapper.py +5 -5
mindspore/scipy/optimize/line_search.py +8 -8
mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
mindspore/scipy/optimize/minimize.py +16 -12
mindspore/scipy/utils.py +1 -52
mindspore/scipy/utils_const.py +4 -4
mindspore/train/__init__.py +4 -4
mindspore/train/_utils.py +13 -5
mindspore/train/amp.py +410 -148
mindspore/train/anf_ir_pb2.py +16 -4
mindspore/train/callback/_backup_and_restore.py +8 -11
mindspore/train/callback/_callback.py +80 -3
mindspore/train/callback/_checkpoint.py +82 -51
mindspore/train/callback/_early_stop.py +12 -15
mindspore/train/callback/_history.py +1 -1
mindspore/train/callback/_lambda_callback.py +13 -13
mindspore/train/callback/_landscape.py +21 -17
mindspore/train/callback/_loss_monitor.py +9 -10
mindspore/train/callback/_on_request_exit.py +16 -33
mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
mindspore/train/callback/_summary_collector.py +44 -30
mindspore/train/callback/_time_monitor.py +62 -12
mindspore/train/data_sink.py +10 -16
mindspore/train/dataset_helper.py +154 -86
mindspore/train/loss_scale_manager.py +14 -9
mindspore/train/metrics/__init__.py +10 -2
mindspore/train/metrics/accuracy.py +1 -1
mindspore/train/metrics/auc.py +1 -1
mindspore/train/metrics/bleu_score.py +2 -2
mindspore/train/metrics/confusion_matrix.py +14 -14
mindspore/train/metrics/cosine_similarity.py +3 -3
mindspore/train/metrics/dice.py +1 -1
mindspore/train/metrics/fbeta.py +1 -1
mindspore/train/metrics/hausdorff_distance.py +8 -6
mindspore/train/metrics/mean_surface_distance.py +5 -4
mindspore/train/metrics/metric.py +49 -17
mindspore/train/metrics/occlusion_sensitivity.py +4 -4
mindspore/train/metrics/perplexity.py +1 -1
mindspore/train/metrics/precision.py +2 -2
mindspore/train/metrics/recall.py +2 -3
mindspore/train/metrics/roc.py +7 -7
mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
mindspore/train/metrics/topk.py +7 -4
mindspore/train/mind_ir_pb2.py +193 -48
mindspore/train/model.py +377 -133
mindspore/train/serialization.py +697 -245
mindspore/train/summary/_summary_adapter.py +5 -2
mindspore/train/summary/_writer_pool.py +4 -3
mindspore/train/summary/summary_record.py +25 -23
mindspore/train/train_thor/convert_utils.py +39 -23
mindspore/train/train_thor/dataset_helper.py +4 -3
mindspore/train/train_thor/model_thor.py +8 -8
mindspore/version.py +1 -1
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
mindspore/_akg/akg/tvm/rpc/base.py +0 -182
mindspore/_akg/akg/tvm/rpc/client.py +0 -436
mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
mindspore/_akg/akg/tvm/rpc/server.py +0 -413
mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
mindspore/_extends/graph_kernel/expander.py +0 -80
mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
mindspore/_extends/parse/jit_fallback_modules.py +0 -51
mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
mindspore/dataset/engine/graphdata.py +0 -1586
mindspore/include/api/net.h +0 -142
mindspore/ops/_grad/grad_array_ops.py +0 -1347
mindspore/ops/_grad/grad_clip_ops.py +0 -84
mindspore/ops/_grad/grad_debug_ops.py +0 -68
mindspore/ops/_grad/grad_inner_ops.py +0 -235
mindspore/ops/_grad/grad_math_ops.py +0 -1684
mindspore/ops/_grad/grad_nn_ops.py +0 -1529
mindspore/ops/_grad/grad_other_ops.py +0 -89
mindspore/ops/_grad/grad_sequence_ops.py +0 -296
mindspore/ops/_grad/grad_sparse.py +0 -323
mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
mindspore/rewrite/node_visitor.py +0 -44
mindspore/rewrite/topological_manager.py +0 -203
mindspore/scipy/sparse/linalg.py +0 -192
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
{mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0

mindspore/dataset/engine/datasets.py CHANGED Viewed

@@ -29,8 +29,10 @@ import atexit
 import glob
 import json
 import os
+import queue
 import signal
 import stat
+import subprocess
 import warnings
 import gc
@@ -62,7 +64,7 @@ from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCE
 from mindspore.parallel._utils import _get_device_num
 from mindspore.dataset.debug import DebugHook
-from . import samplers
+from mindspore.dataset.engine import samplers
 from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
     ITERATORS_LIST, _unset_iterator_cleanup
 from .queue import _SharedQueue, _Queue
@@ -127,8 +129,7 @@ def _reset_training_dataset(global_step, dataset_size):
     """
     dataset = _get_training_dataset()
     if dataset is not None:
-        epoch = global_step // dataset_size
-        dataset._reset(global_step, epoch)  # pylint: disable=protected-access
+        dataset._reset(global_step, dataset_size)  # pylint: disable=protected-access
     else:
         raise RuntimeError("Training dataset is not set.")
@@ -136,9 +137,9 @@ def _reset_training_dataset(global_step, dataset_size):
 class Shuffle(str, Enum):
     """Specify the shuffle mode.
-    - Shuffle.GLOBAL: Shuffle both the files and samples.
-    - Shuffle.FILES: Shuffle files only.
-    - Shuffle.INFILE: Shuffle data within each file.
+    - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
+    - ``Shuffle.FILES`` : Shuffle files only.
+    - ``Shuffle.INFILE`` : Shuffle data within each file.
     """
     GLOBAL: str = "global"
     FILES: str = "files"
@@ -208,7 +209,7 @@ def zip(datasets):
             The number of datasets must be more than 1.
     Returns:
-        Dataset, dataset zipped.
+        Dataset, a new dataset with the above operation applied.
     Raises:
         ValueError: If the number of datasets is 1.
@@ -216,6 +217,10 @@ def zip(datasets):
     Examples:
             >>> # Create a dataset which is the combination of dataset_1 and dataset_2
+            >>> import mindspore.dataset as ds
+            >>>
+            >>> dataset_1 = ds.GeneratorDataset([1], "column1")
+            >>> dataset_2 = ds.GeneratorDataset([2], "column2")
             >>> dataset = ds.zip((dataset_1, dataset_2))
     """
     if len(datasets) <= 1:
@@ -316,7 +321,7 @@ class Dataset:
     Args:
         num_parallel_workers (int, optional): Number of workers to process the dataset in parallel.
-            Default: None.
+            Default: ``None``.
     """
     def __init__(self, children=None, num_parallel_workers=None, cache=None):
@@ -346,6 +351,7 @@ class Dataset:
         self._repeat_count = None
         self._class_indexing = None
         self._sync = False
+        self._global_step = None
     @staticmethod
     def _get_operator_id(dataset):
@@ -382,36 +388,42 @@ class Dataset:
             _OP_PROCESS.update(generator_process)
         return op_name
-    def create_ir_tree(self):
+    def create_ir_tree(self, getter_mode=False):
         """
         Internal method to build an IR tree.
+        Args:
+            getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
         Returns:
-            DatasetNode, the root node of the IR tree.
-            Dataset, the root dataset of the IR tree.
+            Union[DatasetNode, Dataset], the root node of the IR tree and the root dataset of the IR tree.
         """
         parent = self.parent
         self.parent = []
         dataset = copy.deepcopy(self)
         global _OP_NAME
         _OP_NAME = Dataset._get_operator_id(dataset)
-        ir_tree = dataset.parse_tree()
+        ir_tree = dataset.parse_tree(getter_mode)
         self.parent = parent
         _init_device_info()
         return ir_tree, dataset
-    def parse_tree(self):
+    def parse_tree(self, getter_mode=False):
         """
         Internal method to parse the API tree into an IR tree.
+        Args:
+            getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
         Returns:
             DatasetNode, the root node of the IR tree.
         """
         if len(self.parent) > 1:
             raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)")
-        ir_children = [d.parse_tree() for d in self.children]
+        ir_children = [d.parse_tree(getter_mode) for d in self.children]
         # Bootstrap can only be performed on a copy of the original dataset node.
         # Bootstrap on original dataset node will make all iterators share the same process pool
+        self.pre_parse(getter_mode)
         self.iterator_bootstrap()
         ir_node = self.parse(ir_children)
         ir_node = self.post_parse(ir_node)
@@ -450,12 +462,15 @@ class Dataset:
         Serialize a pipeline into JSON string and dump into file if filename is provided.
         Args:
-            filename (str): filename of JSON file to be saved as. Default: ''.
+            filename (str): filename of JSON file to be saved as. Default: ``""``.
         Returns:
             str, JSON string of the pipeline.
         Examples:
+            >>> import mindspore.dataset as ds
+            >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
+            >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
             >>> dataset_json = dataset.to_json("/path/to/mnist_dataset_pipeline.json")
         """
         ir_tree, _ = self.create_ir_tree()
@@ -489,7 +504,7 @@ class Dataset:
             element_length_function (Callable, optional): A function that takes in
                 M arguments where M = len(column_names) and returns an integer. If no value
                 provided, parameter M the len(column_names) must be 1, and the size of the first
-                dimension of that column will be taken as the length. Default: None.
+                dimension of that column will be taken as the length. Default: ``None``.
             pad_info (dict, optional): The information about how to batch each column. The key
                 corresponds to the column name, and the value must be a tuple of 2 elements.
                 The first element corresponds to the shape to pad to, and the second
@@ -497,21 +512,22 @@ class Dataset:
                 specified, then that column will be padded to the longest in the current
                 batch, and 0 will be used as the padding value. Any None dimensions will
                 be padded to the longest in the current batch, unless if
-                `pad_to_bucket_boundary` is True. If no padding is wanted, set pad_info
-                to None. Default: None.
-            pad_to_bucket_boundary (bool, optional): If True, will pad each None
+                `pad_to_bucket_boundary` is ``True``. If no padding is wanted, set `pad_info`
+                to ``None``. Default: ``None``.
+            pad_to_bucket_boundary (bool, optional): If ``True``, will pad each None
                 dimension in `pad_info` to the bucket_boundary minus 1. If there are any
                 elements that fall into the last bucket, an error will occur.
-                Default: False.
-            drop_remainder (bool, optional): If True, will drop the last batch for each
-                bucket if it is not a full batch. Default: False.
+                Default: ``False``.
+            drop_remainder (bool, optional): If ``True``, will drop the last batch for each
+                bucket if it is not a full batch. Default: ``False``.
         Returns:
-            Dataset, dataset bucketized and batched by length.
+            Dataset, a new dataset with the above operation applied.
         Examples:
             >>> # Create a dataset where certain counts rows are combined into a batch
             >>> # and drops the last incomplete batch if there is one.
+            >>> import mindspore.dataset as ds
             >>> import numpy as np
             >>> def generate_2_columns(n):
             ...     for i in range(n):
@@ -553,15 +569,16 @@ class Dataset:
             batch_size (Union[int, Callable]): The number of rows each batch is created with. An
                 int or callable object which takes exactly 1 parameter, BatchInfo.
             drop_remainder (bool, optional): Determines whether or not to drop the last block
-                whose data row number is less than batch size. Default: False. If True, and if there are less
-                than batch_size rows available to make the last batch, then those rows will
-                be dropped and not propagated to the child node.
+                whose data row number is less than batch size. Default: ``False`` . If ``True`` ,
+                and if there are less than `batch_size` rows available to make the last batch,
+                then those rows will be dropped and not propagated to the child node.
             num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
-                Default: None.
+                Default: ``None`` .
             **kwargs:
                 - per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], \
-                  (List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable. Default: None.
+                  (List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable.
+                  Default: ``None``.
                   A callable which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters.
                   Each list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists
                   should match with the number of entries in input_columns. The last parameter of the callable should
@@ -570,30 +587,41 @@ class Dataset:
                   as the input. output_columns is required if the number of output lists is different from input.
                 - input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of
-                  the list should match with signature of per_batch_map callable. Default: None.
+                  the list should match with signature of `per_batch_map` callable. Default: ``None`` .
                 - output_columns (Union[str, list[str]], optional): List of names assigned to the columns
                   outputted by the last operation. This parameter is mandatory if len(input_columns) !=
                   len(output_columns). The size of this list must match the number of output
-                  columns of the last operation. Default: None, output columns will have the same
+                  columns of the last operation. Default: ``None`` , output columns will have the same
                   name as the input columns, i.e., the columns will be replaced.
                 - python_multiprocessing (bool, optional): Parallelize Python function `per_batch_map` with
-                  multi-processing or multi-threading mode, True means multi-processing, False means multi-threading
-                  If `per_batch_map` is a I/O bound task, use multi-threading mode.
-                  If `per_batch_map` is a CPU bound task, it is recommended to use multi-processing mode.
-                  Default: False, use python multi-threading mode.
-                - max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to
-                  copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
+                  multi-processing or multi-threading mode, ``True`` means multi-processing,
+                  ``False`` means multi-threading If `per_batch_map` is a I/O bound task, use
+                  multi-threading mode. If `per_batch_map` is a CPU bound task, it is recommended to use
+                  multi-processing mode. Default: ``False`` , use python multi-threading mode.
+                - max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
+                  allocation to copy data between processes, the total occupied shared memory will increase as
+                  ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
+                  used if python_multiprocessing is set to True. If it is an int value, it represents
+                  ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
+                  If it is a list, the first element represents the ``input_columns`` use this value as the unit to
+                  create shared memory, and the second element represents ``output_columns`` use this value as the unit
+                  to create shared memory. Default: 16.
         Returns:
-            BatchDataset, dataset batched.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # 1) Create a dataset where every 100 rows are combined into a batch
+            >>> # 1) Create a dataset where every 5 rows are combined into a batch
             >>> # and drops the last incomplete batch if there is one.
-            >>> dataset = dataset.batch(100, True)
+            >>> import mindspore.dataset as ds
+            >>> from PIL import Image
+            >>>
+            >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
+            >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_samples=10)
+            >>> dataset = dataset.batch(5, True)
             >>>
             >>> # 2) resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
             >>> def np_resize(col, BatchInfo):
@@ -633,11 +661,11 @@ class Dataset:
             batch_size (Union[int, Callable]): The number of rows each batch is created with. An
                 int or callable object which takes exactly 1 parameter, BatchInfo.
             drop_remainder (bool, optional): Determines whether or not to drop the last block
-                whose data row number is less than batch size. Default: False. If True, and if there are less
-                than batch_size rows available to make the last batch, then those rows will
+                whose data row number is less than batch size. Default: ``False``. If ``True``, and if there
+                are less than batch_size rows available to make the last batch, then those rows will
                 be dropped and not propagated to the child node.
             num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
-                Default: None.
+                Default: ``None``.
             pad_info (dict, optional): The information about how to batch each column. The key
                 corresponds to the column name, and the value must be a tuple of 2 elements.
                 The first element corresponds to the shape to pad to, and the second
@@ -645,19 +673,22 @@ class Dataset:
                 specified, then that column will be padded to the longest in the current
                 batch, and 0 will be used as the padding value. Any None dimensions will
                 be padded to the longest in the current batch, unless if
-                pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
-                to None. Default: None.
+                pad_to_bucket_boundary is True. If no padding is wanted, set `pad_info`
+                to ``None``. Default: ``None``.
         Returns:
-            PaddedBatchDataset, dataset batched.
+            Dataset, a new dataset with the above operation applied.
         Examples:
             >>> # 1) Pad every sample to the largest sample's shape and batch the samples
-            >>> dataset = dataset.padded_batch(100, True, pad_info={})
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.NumpySlicesDataset([[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], "column1")
+            >>> dataset = dataset.padded_batch(2, True, pad_info={})
             >>>
-            >>> # 2) Create a dataset where every 100 rows are combined into a batch
+            >>> # 2) Create a dataset where every 3 rows are combined into a batch
             >>> # and drops the last incomplete batch if there is one.
-            >>> dataset = dataset.padded_batch(100, True)
+            >>> dataset = ds.NumpySlicesDataset([i for i in range(10)], "column1")
+            >>> dataset = dataset.padded_batch(3, True)
             >>>
             >>> # 3) Create a dataset where its batch size is dynamic
             >>> # Define a callable batch size function and let batch size increase 1 each time.
@@ -674,16 +705,19 @@ class Dataset:
         Args:
             condition_name (str): The condition name that is used to toggle sending next row.
-            num_batch (int): the number of batches without blocking at the start of each epoch. Default: 1.
-            callback (function): The callback function that will be invoked when sync_update is called. Default: None.
+            num_batch (int): the number of batches without blocking at the start of each epoch.
+                Default: ``1``.
+            callback (function): The callback function that will be invoked when sync_update is called.
+                Default: ``None``.
         Returns:
-            SyncWaitDataset, dataset added a blocking condition.
+            Dataset, a new dataset with the above operation applied.
         Raises:
             RuntimeError: If condition name already exists.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> import numpy as np
             >>> def gen():
             ...     for i in range(100):
@@ -735,15 +769,18 @@ class Dataset:
                 dataset will result in a global shuffle.
         Returns:
-            Dataset, dataset shuffled.
+            Dataset, a new dataset with the above operation applied.
         Raises:
             RuntimeError: If exist sync operations before shuffle.
         Examples:
-            >>> # dataset is an instance object of Dataset
-            >>> # Optionally set the seed for the first epoch
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>>
+            >>> # Optionally set the seed for fixed randomness
             >>> ds.config.set_seed(58)
+            >>>
             >>> # Create a shuffled dataset using a shuffle buffer of size 4
             >>> dataset = dataset.shuffle(4)
         """
@@ -758,9 +795,10 @@ class Dataset:
                 return a `Dataset` .
         Returns:
-            Dataset, dataset applied by the function.
+            Dataset, a new dataset with the above operation applied.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> # 1) flat_map on one column dataset
             >>> dataset = ds.NumpySlicesDataset([[0, 1], [2, 3]], shuffle=False)
             >>>
@@ -820,11 +858,11 @@ class Dataset:
         `output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
         - If you use transformations (
-          `vision transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
+          `vision transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
           dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
-          `nlp transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
+          `nlp transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
           dataset.transforms.html#module-mindspore.dataset.text>`_ ,
-          `audio transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
+          `audio transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
           dataset.transforms.html#module-mindspore.dataset.audio>`_ )
           provided by mindspore dataset, please use the following parameters:
@@ -839,31 +877,37 @@ class Dataset:
                 applied on the dataset. Operations are applied in the order they appear in this list.
             input_columns (Union[str, list[str]], optional): List of the names of the columns that will be passed to
                 the first operation as input. The size of this list must match the number of
-                input columns expected by the first operation. Default: None, the first
+                input columns expected by the first operation. Default: ``None``, the first
                 operation will be passed however many columns that are required, starting from
                 the first column.
             output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
                 the last operation. This parameter is mandatory if len(input_columns) !=
                 len(output_columns). The size of this list must match the number of output
-                columns of the last operation. Default: None, output columns will have the same
+                columns of the last operation. Default: ``None``, output columns will have the same
                 name as the input columns, i.e., the columns will be replaced.
             num_parallel_workers (int, optional): Number of threads used to process the dataset in
-                parallel. Default: None, the value from the configuration will be used.
+                parallel. Default: ``None``, the value from the configuration will be used.
             **kwargs:
                 - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
-                  This option could be beneficial if the Python operation is computational heavy. Default: False.
+                  This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
-                - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to
-                  copy data between processes.  This is only used if python_multiprocessing is set to True. Default: 16.
+                - max_rowsize (Union[int, list[int]], optional): Maximum size of row in MB that is used for shared
+                  memory allocation to copy data between processes, the total occupied shared memory will increase as
+                  ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
+                  used if python_multiprocessing is set to True. If it is an int value, it represents
+                  ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
+                  If it is a list, the first element represents the ``input_columns`` use this value as the unit to
+                  create shared memory, and the second element represents ``output_columns`` use this value as the unit
+                  to create shared memory. Default: 16.
                 - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-                  Default: None, which means no cache is used.
+                  Default: ``None``, which means no cache is used.
                 - callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called.
-                  Default: None.
+                  Default: ``None``.
-                - offload (bool, optional): Flag to indicate whether offload is used. Default: None.
+                - offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
         Note:
             - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
@@ -872,17 +916,21 @@ class Dataset:
               `operations` .
         Returns:
-            Dataset, dataset after mapping operation.
+            Dataset, a new dataset with the above operation applied.
         Examples:
+            >>> import mindspore.dataset as ds
+            >>> import mindspore.dataset.vision as vision
             >>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
             >>> # image is of type bytes type which can be decoded to RGB
             >>> # label is of type int32
+            >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
+            >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir)
             >>>
             >>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
-            >>> decode_op = c_vision.Decode(rgb=True)
-            >>> random_jitter_op = c_vision.RandomColorAdjust(brightness=(0.8, 0.8), contrast=(1, 1),
-            ...                                               saturation=(1, 1), hue=(0, 0))
+            >>> decode_op = vision.Decode(to_pil=False)
+            >>> random_jitter_op = vision.RandomColorAdjust(brightness=(0.8, 0.8), contrast=(1, 1),
+            ...                                             saturation=(1, 1), hue=(0, 0))
             >>>
             >>> # 1) Simple map example.
             >>>
@@ -948,16 +996,19 @@ class Dataset:
         Args:
             predicate (callable): Python callable which returns a boolean value. If False then filter the element.
             input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
-                or provided with None, the predicate will be applied on all columns in the dataset. Default: None.
+                or provided with ``None``, the predicate will be applied on all columns in the dataset.
+                Default: ``None``.
             num_parallel_workers (int, optional): Number of workers to process the dataset
-                in parallel. Default: None.
+                in parallel. Default: ``None``.
         Returns:
-            Dataset, dataset filtered.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # generator data(0 ~ 63)
+            >>> # generator data(0 ~ 19)
             >>> # filter the data that greater than or equal to 11
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(20)], "data")
             >>> dataset = dataset.filter(predicate=lambda data: data < 11, input_columns = ["data"])
         """
         return FilterDataset(self, predicate, input_columns, num_parallel_workers)
@@ -965,20 +1016,21 @@ class Dataset:
     @check_repeat
     def repeat(self, count=None):
         """
-        Repeat this dataset `count` times. Repeat infinitely if the count is None or -1.
+        Repeat this dataset `count` times. Repeat infinitely if the `count` is ``None`` or ``-1``.
         Note:
             The order of using repeat and batch reflects the number of batches. It is recommended that
             the repeat operation is used after the batch operation.
         Args:
-            count (int): Number of times the dataset is going to be repeated. Default: None.
+            count (int): Number of times the dataset is going to be repeated. Default: ``None``.
         Returns:
-            Dataset, dataset repeated.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>>
             >>> # Create a dataset where the dataset is repeated for 50 epochs
             >>> dataset = dataset.repeat(50)
@@ -1004,11 +1056,12 @@ class Dataset:
             count (int): Number of elements in the dataset to be skipped.
         Returns:
-            Dataset, dataset that containing rows like origin rows subtract skipped rows.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
-            >>> # Create a dataset which skips first 3 elements from data
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>> # Skip first 3 elements of dataset and retain 7 elements.
             >>> dataset = dataset.skip(3)
         """
         return SkipDataset(self, count)
@@ -1016,23 +1069,28 @@ class Dataset:
     @check_take
     def take(self, count=-1):
         """
-        Takes at most given numbers of elements from the dataset.
-        Note:
-            1. If count is greater than the number of elements in the dataset or equal to -1,
-               all the elements in dataset will be taken.
-            2. The order of using take and batch matters. If take is before batch operation,
-               then take the given number of rows; otherwise take the given number of batches.
+        Take the first specified number of samples from the dataset.
         Args:
-            count (int, optional): Number of elements to be taken from the dataset. Default: -1.
+            count (int, optional): The desired number of samples to take. If the value exceeds
+                the total number of samples in the dataset, all data will be returned.
+                Default: ``-1`` , will return all data.
+        Note:
+            When there are operations that will change the number of samples of the dataset in
+            the data pipeline, the location of the `take` operation can change its effect.
+            For example, `batch` operation will combine the successive samples of the specified
+            `batch_size` into 1 sample, so `.batch(batch_size).take(1)` will be equivalent to
+            `.take(batch_size).batch(batch_size)`.
         Returns:
-            Dataset, dataset taken.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
-            >>> # Create a dataset where the dataset includes 50 elements.
+            >>> import mindspore.dataset as ds
+            >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
+            >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
+            >>> # Take 50 samples from MNIST dataset.
             >>> dataset = dataset.take(50)
         """
         return TakeDataset(self, count)
@@ -1113,7 +1171,7 @@ class Dataset:
                 - The sum of split sizes > K, the difference of sigma(round(fi * K)) - K will be removed from the first
                   large enough split such that it will have at least 1 row after removing the difference.
-            randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
+            randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
                 If True, the data will be randomly split. Otherwise, each split will be created with
                 consecutive rows from the dataset.
@@ -1124,7 +1182,7 @@ class Dataset:
                will be different in each epoch.
         Returns:
-            tuple(Dataset), a tuple of datasets that have been split.
+            Tuple[Dataset], a tuple of new datasets split from the original one.
         Raises:
             RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -1136,9 +1194,9 @@ class Dataset:
                 floats don't sum to 1.
         Examples:
-            >>> # TextFileDataset is not a mappable dataset, so this non-optimized split will be called.
-            >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
-            >>> dataset = ds.TextFileDataset(text_file_dataset_dir, shuffle=False)
+            >>> # Split the data into train part and test part.
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>> train_dataset, test_dataset = dataset.split([0.9, 0.1])
         """
         if self.is_shuffled():
@@ -1179,14 +1237,17 @@ class Dataset:
                 to be zipped together with this dataset.
         Returns:
-            Dataset, dataset zipped.
+            Dataset, a new dataset with the above operation applied.
         Raises:
             TypeError: The parameter is not dataset object or tuple of dataset objects.
         Examples:
-            >>> # Create a dataset which is the combination of dataset and dataset_1
-            >>> dataset = dataset.zip(dataset_1)
+            >>> # Create a dataset which is the combination of dataset_1 and dataset_2
+            >>> import mindspore.dataset as ds
+            >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1")
+            >>> dataset_2 = ds.GeneratorDataset([1, 2, 3], "column2")
+            >>> dataset = dataset_1.zip(dataset_2)
         """
         if isinstance(datasets, tuple):
             datasets = (self, *datasets)
@@ -1202,6 +1263,12 @@ class Dataset:
         Concatenate the dataset objects in the input list.
         Performing "+" operation on dataset objects can achieve the same effect.
+        For a dataset concatenated by many other dataset objects, it returns the data in the order of
+        datasets passed in. If you want to change the data order(such as random selection from each dataset
+        instead of in sequence), apply `use_sampler` method on the concatenated dataset object.
+        Currently `use_sampler` supports `dataset.DistributedSampler` for sharding selection from each dataset
+        or `dataset.RandomSampler` for random selection from each dataset, see examples below.
         Note:
             The column name, and rank and type of the column data must be the same in the input datasets.
@@ -1210,13 +1277,45 @@ class Dataset:
                 to be concatenated together with this dataset.
         Returns:
-            Dataset, dataset concatenated.
+            Dataset, a new dataset with the above operation applied.
         Examples:
+            >>> import mindspore.dataset as ds
+            >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
+            >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
+            >>>
             >>> # Create a dataset by concatenating dataset_1 and dataset_2 with "+" operator
             >>> dataset = dataset_1 + dataset_2
             >>> # Create a dataset by concatenating dataset_1 and dataset_2 with concat operation
             >>> dataset = dataset_1.concat(dataset_2)
+            >>>
+            >>> # Check the data order of dataset
+            >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
+            >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
+            >>> dataset = dataset_1 + dataset_2
+            >>> result = list(dataset)
+            >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 2)],
+            >>> #  [Tensor(shape=[], dtype=Int64, value= 3)], [Tensor(shape=[], dtype=Int64, value= 4)],
+            >>> #  [Tensor(shape=[], dtype=Int64, value= 5)], [Tensor(shape=[], dtype=Int64, value= 6)]]
+            >>>
+            >>> # Change the data order of concatenated dataset with sharding selection
+            >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
+            >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
+            >>> dataset = dataset_1.concat(dataset_2)
+            >>> dataset.use_sampler(ds.DistributedSampler(num_shards=2, shard_id=1, shuffle=False))
+            >>> result = list(dataset)
+            >>> # [[Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 4)],
+            >>> #  [Tensor(shape=[], dtype=Int64, value= 6)]]
+            >>>
+            >>> # Change the data order of concatenated dataset with random selection
+            >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
+            >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
+            >>> dataset = dataset_1.concat(dataset_2)
+            >>> dataset.use_sampler(ds.RandomSampler())
+            >>> result = list(dataset)
+            >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 4)],
+            >>> #  [Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 5)],
+            >>> #  [Tensor(shape=[], dtype=Int64, value= 6)], [Tensor(shape=[], dtype=Int64, value= 3)]]
         """
         if isinstance(datasets, Dataset):
             datasets = [self] + [datasets]
@@ -1236,16 +1335,17 @@ class Dataset:
             output_columns (Union[str, list[str]]): List of names of the output columns.
         Returns:
-            Dataset, dataset renamed.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
             >>> input_columns = ["input_col1", "input_col2", "input_col3"]
             >>> output_columns = ["output_col1", "output_col2", "output_col3"]
             >>>
-            >>> # Create a dataset where input_col1 is renamed to output_col1, and
-            >>> # input_col2 is renamed to output_col2, and input_col3 is renamed
-            >>> # to output_col3.
+            >>> # Create a dataset with 3 columns
+            >>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
+            >>>
+            >>> # Rename "input_col1" to "output_col1", "input_col2" to "output_col2", "input_col3" to "output_col3"
             >>> dataset = dataset.rename(input_columns=input_columns, output_columns=output_columns)
         """
@@ -1261,13 +1361,15 @@ class Dataset:
             columns(Union[str, list[str]]): List of names of the columns to project.
         Returns:
-            Dataset, dataset projected.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
-            >>> columns_to_project = ["column3", "column1", "column2"]
+            >>> import mindspore.dataset as ds
+            >>> # Create a dataset with 3 columns
+            >>> input_columns = ["column1", "column2", "column3"]
+            >>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
             >>>
-            >>> # Create a dataset that consists of column3, column1, column2
+            >>> columns_to_project = ["column3", "column1", "column2"]
             >>> # in that order, regardless of the original order of columns.
             >>> dataset = dataset.project(columns=columns_to_project)
         """
@@ -1283,10 +1385,11 @@ class Dataset:
                                    return a preprocessed `Dataset` .
         Returns:
-            Dataset, dataset applied by the function.
+            Dataset, a new dataset with the above operation applied.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>>
             >>> # Declare an apply_func function which returns a Dataset object
             >>> def apply_func(data):
@@ -1310,41 +1413,45 @@ class Dataset:
         return dataset
     @check_device_send
-    def device_que(self, send_epoch_end=True, create_data_info_queue=False):
+    def device_que(self, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
         """
         Return a transferred Dataset that transfers data through a device.
         Args:
-            send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
+            send_epoch_end (bool, optional): Whether to send end of sequence to device or not.
+                Default: ``True``.
             create_data_info_queue (bool, optional): Whether to create queue which stores
-                types and shapes of data or not. Default: False.
+                types and shapes of data or not. Default: ``False``.
+            queue_name (str, optional): Name of queue which connects dataset processing and model
+                computing. Default: ``""``.
         Note:
             If device is Ascend, features of data will be transferred one by one. The limitation
             of data transmission per time is 256M.
         Returns:
-            Dataset, dataset for transferring.
+            Dataset, a new dataset with the above operation applied.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> import time
             >>>
             >>> data = ds.TFRecordDataset('/path/to/TF_FILES', '/path/to/TF_SCHEMA_FILE', shuffle=ds.Shuffle.FILES)
-            >>>
             >>> data = data.device_que()
             >>> data.send()
             >>> time.sleep(0.1)
             >>> data.stop_send()
         """
-        return TransferDataset(self, send_epoch_end, create_data_info_queue)
+        return TransferDataset(self, send_epoch_end, create_data_info_queue, queue_name)
     @check_save
     def save(self, file_name, num_files=1, file_type='mindrecord'):
         """
         Save the dynamic data processed by the dataset pipeline in common dataset format.
-        Supported dataset formats: `mindrecord` only. And you can use `MindDataset` API to read the saved file(s).
+        Supported dataset formats: ``'mindrecord'`` only. And you can use
+        :class:`mindspore.dataset.MindDataset` API to read the saved file(s).
-        Implicit type casting exists when saving data as `mindrecord` . The transform table shows how to do
+        Implicit type casting exists when saving data as ``'mindrecord'`` . The transform table shows how to do
         type casting.
         .. list-table:: Implicit Type Casting when Saving as `mindrecord`
@@ -1395,27 +1502,27 @@ class Dataset:
              - Multi-dimensional string not supported
         Note:
-            1. To save the samples in order, set dataset's shuffle to False and num_files to 1.
+            1. To save the samples in order, set dataset's `shuffle` to ``False`` and `num_files` to ``1``.
             2. Before calling the function, do not use batch operation, repeat operation or data augmentation operations
                with random attribute in map operation.
             3. When array dimension is variable, one-dimensional arrays or
                multi-dimensional arrays with variable dimension 0 are supported.
-            4. Mindrecord does not support uint64, multi-dimensional uint8(drop dimension) nor
+            4. MindRecord does not support uint64, multi-dimensional uint8(drop dimension) nor
                multi-dimensional string.
         Args:
             file_name (str): Path to dataset file.
-            num_files (int, optional): Number of dataset files. Default: 1.
-            file_type (str, optional): Dataset format. Default: 'mindrecord'.
+            num_files (int, optional): Number of dataset files. Default: ``1`` .
+            file_type (str, optional): Dataset format. Default: ``'mindrecord'`` .
         Examples:
+            >>> import mindspore.dataset as ds
             >>> import numpy as np
             >>>
             >>> def generator_1d():
             ...     for i in range(10):
             ...         yield (np.array([i]),)
             >>>
-            >>>
             >>> # apply dataset operations
             >>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
             >>> d1.save('/path/to/save_file')
@@ -1442,19 +1549,21 @@ class Dataset:
         Args:
             columns (list[str], optional): List of columns to be used to specify the order of columns.
-                Default: None, means all columns.
+                Default: ``None``, means all columns.
             num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
-                Default: -1, iterator can be iterated infinite number of epochs.
+                Default: ``-1``, iterator can be iterated infinite number of epochs.
             output_numpy (bool, optional): Whether or not to output NumPy datatype.
-                If output_numpy=False, iterator will output MSTensor. Default: False.
-            do_copy (bool, optional): When output data type is mindspore.Tensor,
-                use this param to select the conversion method, only take False for better performance. Default: True.
+                If `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False``.
+            do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
+                use this param to select the conversion method, only take False for better performance.
+                Default: ``True``.
         Returns:
-            Iterator, tuple iterator over the dataset.
+            Iterator, a dataset iterator that returns data of type Tuple.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>> iterator = dataset.create_tuple_iterator()
             >>> for item in iterator:
             ...     # item is a list
@@ -1476,17 +1585,19 @@ class Dataset:
         Args:
             num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
-                Default: -1, iterator can be iterated infinite number of epochs.
+                Default: ``-1`` , iterator can be iterated infinite number of epochs.
             output_numpy (bool, optional): Whether or not to output NumPy datatype,
-                if output_numpy=False, iterator will output MSTensor. Default: False.
-            do_copy (bool, optional): When output data type is mindspore.Tensor,
-                use this param to select the conversion method, only take False for better performance. Default: True.
+                if `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False`` .
+            do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
+                use this param to select the conversion method, only take False for better performance.
+                Default: ``True`` .
         Returns:
-            Iterator, dictionary iterator over the dataset.
+            Iterator, a dataset iterator that returns data of type Dict.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>> iterator = dataset.create_dict_iterator()
             >>> for item in iterator:
             ...     # item is a dict
@@ -1515,7 +1626,8 @@ class Dataset:
             int, tuple of the input index information.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>> # set input_indexs
             >>> dataset.input_indexs = 10
             >>> print(dataset.input_indexs)
@@ -1542,11 +1654,14 @@ class Dataset:
     def copy_batch_size(self, value):
         self._batch_size = value
-    def _init_tree_getters(self):
+    def _init_tree_getters(self, getter_mode=True):
         """
         Get pipeline information.
+        Args:
+            getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``True``.
         """
-        ir_tree, api_tree = self.create_ir_tree()
+        ir_tree, api_tree = self.create_ir_tree(getter_mode)
         runtime_context = cde.PythonRuntimeContext()
         runtime_context.Init()
@@ -1576,8 +1691,12 @@ class Dataset:
             list, list of column names in the dataset.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
             >>> col_names = dataset.get_col_names()
+            >>> print(col_names)
+            ['column1']
         """
         if self._col_names is None:
             runtime_getter = self._init_tree_getters()
@@ -1591,22 +1710,26 @@ class Dataset:
         Get the shapes of output data.
         Args:
-            estimate (bool): If `estimate` is False, will return the shapes of first data row.
+            estimate (bool): If `estimate` is ``False`` , will return the shapes of first data row.
                 Otherwise, will iterate the whole dataset and return the estimated shapes of data row,
-                where dynamic shape is marked as None (used in dynamic data shapes scenario). Default: False.
+                where dynamic shape is marked as None (used in dynamic data shapes scenario).
+                Default: ``False`` .
         Returns:
             list, list of shapes of each column.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> import numpy as np
             >>>
             >>> def generator1():
             ...     for i in range(1, 100):
-            ...         yield np.ones((16, i, 83)), np.array(i)
+            ...         yield np.ones((16, 83, 83)), np.array([i])
             >>>
             >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
             >>> output_shapes = dataset.output_shapes()
+            >>> print(output_shapes)
+            [[16, 83, 83], [1]]
         """
         # cache single shape
         if not estimate and self.saved_output_shapes is not None:
@@ -1641,8 +1764,17 @@ class Dataset:
             list, list of data types.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> import numpy as np
+            >>>
+            >>> def generator1():
+            ...     for i in range(1, 100):
+            ...         yield np.ones((16, 83, 83)).astype(np.float32), np.array([i]).astype(np.int32)
+            >>>
+            >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
             >>> output_types = dataset.output_types()
+            >>> print(output_types)
+            [dtype('float32'), dtype('int32')]
         """
         if self.saved_output_types is None:
             runtime_getter = self._init_tree_getters()
@@ -1666,8 +1798,18 @@ class Dataset:
             int, number of batches.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> import numpy as np
+            >>>
+            >>> # A generator return 66 samples
+            >>> def generator1():
+            ...     for i in range(66):
+            ...         yield np.ones((16, 83, 83)), np.array([i])
+            >>>
+            >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
             >>> dataset_size = dataset.get_dataset_size()
+            >>> print(dataset_size)
+            66
         """
         if self.dataset_size is None:
             runtime_getter = self.__init_size_getter()
@@ -1685,7 +1827,11 @@ class Dataset:
             int, number of classes.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> # Read image files
+            >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
+            >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
+            >>> # Check how many classes exist in image folder
             >>> num_classes = dataset.num_classes()
         """
         if self._num_classes is None:
@@ -1718,19 +1864,18 @@ class Dataset:
         Args:
             condition_name (str): The condition name that is used to toggle sending next row.
             num_batch (Union[int, None]): The number of batches (rows) that are released.
-                When num_batch is None, it will default to the number specified by the
-                sync_wait operation. Default: None.
-            data (Any): The data passed to the callback, user defined. Default: None.
+                When `num_batch` is ``None``, it will default to the number specified by the
+                `sync_wait` operation. Default: ``None``.
+            data (Any): The data passed to the callback, user defined. Default: ``None``.
         Examples:
             >>> import numpy as np
-            >>>
+            >>> import mindspore.dataset as ds
             >>>
             >>> def gen():
             ...     for i in range(100):
             ...         yield (np.array(i),)
             >>>
-            >>>
             >>> class Augment:
             ...     def __init__(self, loss):
             ...         self.loss = loss
@@ -1741,7 +1886,6 @@ class Dataset:
             ...     def update(self, data):
             ...         self.loss = data["loss"]
             >>>
-            >>>
             >>> batch_size = 10
             >>> dataset = ds.GeneratorDataset(gen, column_names=["input"])
             >>> aug = Augment(0)
@@ -1780,8 +1924,12 @@ class Dataset:
             int, the batch size of data.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>> dataset = dataset.batch(2)
             >>> batch_size = dataset.get_batch_size()
+            >>> print(batch_size)
+            2
         """
         if self._batch_size is None:
             runtime_getter = self._init_tree_getters()
@@ -1792,14 +1940,18 @@ class Dataset:
     def get_repeat_count(self):
         """
-        Get the replication times in RepeatDataset. Default: 1.
+        Get the replication times in RepeatDataset. Default: ``1`` .
         Returns:
             int, the count of repeat.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>> dataset = dataset.repeat(5)
             >>> repeat_count = dataset.get_repeat_count()
+            >>> print(repeat_count)
+            5
         """
         if self._repeat_count is None:
             runtime_getter = self._init_tree_getters()
@@ -1810,15 +1962,19 @@ class Dataset:
     def get_class_indexing(self):
         """
-        Return the class index.
+        Get the mapping dictionary from category names to category indexes.
+        This dictionary can be used to look up which category name corresponds to a particular category index.
         Returns:
-            dict, a str-to-int mapping from label name to index.
-            dict, a str-to-list<int> mapping from label name to index for Coco ONLY. The second number
-            in the list is used to indicate the super category.
+            Dict[str, int], the mappings from category names to category indexes.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> # Read image files
+            >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
+            >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
+            >>> # Check how many classes exist in image folder
             >>> class_indexing = dataset.get_class_indexing()
         """
         if self.children:
@@ -1830,6 +1986,7 @@ class Dataset:
         Reset the dataset for next epoch.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> mind_dataset_dir = ["/path/to/mind_dataset_file"]
             >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
             >>> for _ in range(5):
@@ -1882,6 +2039,13 @@ class Dataset:
             shard_id = 0
         return num_shards, shard_id
+    def pre_parse(self, getter_mode):
+        if getter_mode:
+            if hasattr(self, "python_multiprocessing"):
+                self.python_multiprocessing = False
+            if hasattr(self, "num_parallel_workers"):
+                self.num_parallel_workers = 1
     def post_parse(self, ir_node):
         if self.cache:
             ir_node = ir_node.set_cache_client(self.cache.cache_client)
@@ -1890,6 +2054,18 @@ class Dataset:
         return ir_node
+    def set_init_step(self, init_step):
+        self._global_step = init_step
+    def get_init_step(self):
+        if self._global_step is not None:
+            return self._global_step
+        if len(self.children) == 1:
+            return self.children[0].get_init_step()
+        # When there are multiple children, we cannot tell from which child to get the initial step,
+        # so we initialize from the beginning
+        return 0
 class VisionBaseDataset(Dataset):
     """
@@ -1968,9 +2144,9 @@ class TextBaseDataset(Dataset):
         Returns:
             SentencePieceVocab, vocab built from the dataset.
         """
-        warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
+        warnings.warn("mindspore.dataset.Dataset.build_sentencepiece_vocab is deprecated from version 2.0 "
                       "and will be removed in a future version. "
-                      "Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
+                      "Use mindspore.dataset.text.SentencePieceVocab.from_dataset instead.", DeprecationWarning)
     def _build_vocab(self, columns, freq_range, top_k, special_tokens, special_first):
         """
@@ -2193,8 +2369,11 @@ class MappableDataset(SourceDataset):
             new_sampler (Sampler): The child sampler to be added.
         Examples:
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>>
             >>> new_sampler = ds.DistributedSampler(10, 2)
-            >>> dataset.add_sampler(new_sampler)  # dataset is an instance of Dataset
+            >>> dataset.add_sampler(new_sampler)
         """
         # Note: By adding a sampler, the sampled IDs will flow to the new_sampler
         # after first passing through the current samplers attached to this dataset.
@@ -2210,7 +2389,9 @@ class MappableDataset(SourceDataset):
             new_sampler (Sampler): The new sampler to replace with.
         Examples:
-            >>> # dataset is an instance object of Dataset
+            >>> import mindspore.dataset as ds
+            >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
+            >>>
             >>> # use a DistributedSampler instead
             >>> new_sampler = ds.DistributedSampler(10, 2)
             >>> dataset.use_sampler(new_sampler)
@@ -2251,24 +2432,25 @@ class MappableDataset(SourceDataset):
                 - The sum of split sizes > K, the difference will be removed from the first large
                   enough split such that it will have at least 1 row after removing the difference.
-            randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
-                If True, the data will be randomly split. Otherwise, each split will be created with
+            randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
+                If ``True``, the data will be randomly split. Otherwise, each split will be created with
                 consecutive rows from the dataset.
         Note:
             1. There is an optimized split function, which will be called automatically when the dataset
                that calls this function is a MappableDataset.
             2. Dataset should not be sharded if split is going to be called. Instead, create a
-               DistributedSampler and specify a split to shard after splitting. If the dataset is
-               sharded after a split, it is strongly recommended setting the same seed in each instance
-               of execution, otherwise each shard may not be part of the same split (see Examples).
-            3. It is strongly recommended to not shuffle the dataset, but use randomize=True instead.
+               :class:`mindspore.dataset.DistributedSampler` and specify a split to shard after splitting.
+               If the dataset is sharded after a split, it is strongly recommended setting the same
+               seed in each instance of execution, otherwise each shard may not be part of the same
+               split (see Examples).
+            3. It is strongly recommended to not shuffle the dataset, but set `randomize` to ``True`` instead.
                Shuffling the dataset may not be deterministic, which means the data in each split
                will be different in each epoch. Furthermore, if sharding occurs after split, each
                shard may not be part of the same split.
         Returns:
-            tuple(Dataset), a tuple of datasets that have been split.
+            Tuple[Dataset], a tuple of new datasets split from the original one.
         Raises:
             RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -2280,7 +2462,9 @@ class MappableDataset(SourceDataset):
                 floats don't sum to 1.
         Examples:
+            >>> import mindspore.dataset as ds
             >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
+            >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
             >>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir, shuffle=False)
             >>>
             >>> # Set the seed, and tell split to use this seed when randomizing.
@@ -2348,7 +2532,7 @@ class BucketBatchByLengthDataset(UnionBaseDataset):
                                            self.pad_to_bucket_boundary, self.drop_remainder)
-def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
+def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
     """
     Check sufficient shared memory is available for shared memory queues
     when training in parallel mode.
@@ -2358,10 +2542,10 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
         device_num = _get_device_num()
         # In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
         # on the ascend server is 8.
-        if device_num > 1 and context.get_context("device_target") == "Ascend":
+        if device_num > 1:
             device_num = min(device_num, 8)
-        shm_estimate_usage = device_num * num_worker * num_queues * \
-                             (queue_size + 2) * max_rowsize * 1024 * 1024
+        shm_estimate_usage = device_num * num_worker * \
+                             (queue_size + 2) * (in_rowsize + out_rowsize) * 1024 * 1024
         try:
             shm_available = psutil.disk_usage('/dev/shm').free
             if shm_estimate_usage >= threshold_ratio * shm_available:
@@ -2385,10 +2569,10 @@ class BatchDataset(UnionBaseDataset):
         batch_size (Union[int, function]): The number of rows each batch is created with. An
             int or callable which takes exactly 1 parameter, BatchInfo.
         drop_remainder (bool, optional): Determines whether or not to drop the last
-            possibly incomplete batch. Default: False. If True, and if there are less
+            possibly incomplete batch. Default: ``False``. If True, and if there are less
             than batch_size rows available to make the last batch, then those rows will
             be dropped and not propagated to the child node.
-        num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
+        num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
         per_batch_map (callable, optional): Per batch map callable. A callable which takes
             (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch of
             Tensors on a given column. The number of lists should match with number of entries in input_columns. The
@@ -2398,10 +2582,16 @@ class BatchDataset(UnionBaseDataset):
         output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
             the last operation. This parameter is mandatory if len(input_columns) !=
             len(output_columns). The size of this list must match the number of output
-            columns of the last operation. Default: None, output columns will have the same
+            columns of the last operation. Default: ``None``, output columns will have the same
             name as the input columns, i.e., the columns will be replaced.
-        max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
-            data between processes.  This is only used if python_multiprocessing is set to True. Default: 16.
+        max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
+            allocation to copy data between processes, the total occupied shared memory will increase as
+            ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
+            used if python_multiprocessing is set to True. If it is an int value, it represents
+            ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
+            If it is a list, the first element represents the ``input_columns`` use this value as the unit to
+            create shared memory, and the second element represents ``output_columns`` use this value as the unit
+            to create shared memory. Default: 16.
     """
@@ -2427,7 +2617,10 @@ class BatchDataset(UnionBaseDataset):
         self.python_multiprocessing = python_multiprocessing
         self.process_pool = None
-        self.max_rowsize = max_rowsize
+        if isinstance(max_rowsize, int):
+            self.max_rowsize = [max_rowsize * self.batch_size] * 2
+        else:
+            self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
     def __del__(self):
         if hasattr(self, "process_pool") and self.process_pool is not None:
@@ -2497,7 +2690,7 @@ class BatchDataset(UnionBaseDataset):
                 self.num_parallel_workers = get_num_parallel_workers()
             self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, [self.per_batch_map],
-                                                       self.max_rowsize * self.batch_size)
+                                                       self.max_rowsize)
             # Wrap per_batch_map into _PythonCallable
             self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
         else:
@@ -2507,19 +2700,53 @@ class BatchDataset(UnionBaseDataset):
 class BatchInfo(cde.CBatchInfo):
     """
-    Only the batch size function and per_batch_map of the batch operation can dynamically adjust parameters
-    based on the number of batches and epochs during training.
+    This class helps to get dataset information dynamically when the input of `batch_size` or `per_batch_map`
+    in `batch` operation is a callable object.
     """
     def get_batch_num(self):
         """
-        Return the batch number of the current batch.
+        Return the batch number being processed in current epoch, start from 0.
+        Examples:
+            >>> # Create a dataset where its batch size is dynamic
+            >>> # Define a callable batch size function and let batch size increase 1 each time.
+            >>> import mindspore.dataset as ds
+            >>> from mindspore.dataset import BatchInfo
+            >>>
+            >>> dataset = ds.GeneratorDataset([i for i in range(3)], "column1", shuffle=False)
+            >>> def add_one(BatchInfo):
+            ...     return BatchInfo.get_batch_num() + 1
+            >>> dataset = dataset.batch(batch_size=add_one)
+            >>> print(list(dataset))
+            [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[2], dtype=Int64, value= [1, 2])]]
         """
         return
     def get_epoch_num(self):
         """
-        Return the epoch number of the current batch.
+        Return the epoch number, start from 0.
+        Examples:
+            >>> # Create a dataset where its batch size is dynamic
+            >>> # Define a callable batch size function and let batch size increase 1 each epoch.
+            >>> import mindspore.dataset as ds
+            >>> from mindspore.dataset import BatchInfo
+            >>>
+            >>> dataset = ds.GeneratorDataset([i for i in range(4)], "column1", shuffle=False)
+            >>> def add_one_by_epoch(BatchInfo):
+            ...     return BatchInfo.get_epoch_num() + 1
+            >>> dataset = dataset.batch(batch_size=add_one_by_epoch)
+            >>>
+            >>> result = []
+            >>> epoch = 2
+            >>> iterator = dataset.create_tuple_iterator(num_epochs=epoch)
+            >>> for i in range(epoch):
+            ...    result.extend(list(iterator))
+            >>> # result:
+            >>> # [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[1], dtype=Int64, value= [1])],
+            >>> #  [Tensor(shape=[1], dtype=Int64, value= [2])], [Tensor(shape=[1], dtype=Int64, value= [3])],
+            >>> #  [Tensor(shape=[2], dtype=Int64, value= [0, 1])], [Tensor(shape=[2], dtype=Int64, value= [2, 3])]]
         """
         return
@@ -2530,7 +2757,7 @@ class BlockReleasePair:
     Args:
         init_release_rows (int): Number of lines to allow through the pipeline.
-        callback (function): The callback function that will be called when release is called. Default: None.
+        callback (function): The callback function that will be called when release is called. Default: ``None``.
     """
     def __init__(self, init_release_rows, callback=None):
@@ -2602,10 +2829,10 @@ class PaddedBatchDataset(UnionBaseDataset):
         batch_size (Union[int, function]): The number of rows each batch is created with. An
             int or callable which takes exactly 1 parameter, BatchInfo.
         drop_remainder (bool, optional): Determines whether or not to drop the last
-            possibly incomplete batch. Default: False. If True, and if there are less
+            possibly incomplete batch. Default: ``False``. If True, and if there are less
             than batch_size rows available to make the last batch, then those rows will
             be dropped and not propagated to the child node.
-        num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
+        num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
         pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
             will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
     """
@@ -2675,7 +2902,7 @@ class SyncWaitDataset(UnionBaseDataset):
         input_dataset (Dataset): Input dataset to apply flow control.
         num_batch (int): Number of batches without blocking at the start of each epoch.
         condition_name (str): Condition name that is used to toggle sending next row.
-        callback (function): Callback function that will be invoked when sync_update is called. Default: None.
+        callback (function): Callback function that will be invoked when sync_update is called. Default: ``None``.
     Raises:
         RuntimeError: If condition name already exists.
@@ -2782,24 +3009,11 @@ class _PythonCallable:
         self.pool = pool
         # Python callable index
         self.idx = idx
-        self.check_interval = get_multiprocessing_timeout_interval()
     def __call__(self, *args):
         result = None
-        start_time = time.time()
-        count = 1
         get_data_from_worker_process = False
         while get_data_from_worker_process is False:
-            cost_time = time.time() - start_time
-            if cost_time > (self.check_interval * count):
-                logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
-                               "workers of map operation cost long time to process next data. "
-                               "Worker process list are: " + str(self.pool.get_pids()) + ", you can use "
-                               "\"py-spy dump -p {PID} -l -s \""
-                               "to dump the worker process stack. You can also set the timeout interval by "
-                               "ds.config.set_multiprocessing_interval to adjust the output frequency of this "
-                               "log.")
-                count += 1
             if self.pool.is_running() and check_iterator_cleanup() is False:
                 try:
                     result = self.pool.execute(self.idx, *args)
@@ -2820,6 +3034,7 @@ class _PythonCallable:
         return self.py_callable.to_json()
+# used when python_multiprocessing=True in map
 class Pipe:
     """
     Class to handle communication between the master process and the worker processes.
@@ -2829,29 +3044,34 @@ class Pipe:
         self.shared_memory = shared_memory
         self.eof = multiprocessing.Event()
         if self.shared_memory:
-            self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
-            self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
+            self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
+            self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
         else:
             self.in_queue = _Queue(1)
             self.res_queue = _Queue(1)
-        self.in_queue._joincancelled = True  # pylint: disable=W0212
-        self.res_queue._joincancelled = True  # pylint: disable=W0212
+        self.in_queue.cancel_join_thread()  # Ensure that the process does not hung when exiting
     def master_send(self, func_index, data):
         self.in_queue.put_nowait((func_index, *data))
     def master_receive(self):
-        return self.res_queue.get_until(timeout=1, exit_signal=self.eof)
+        if self.eof is None:
+            raise RuntimeError("EOF is none when get data from worker.")
+        if self.eof.is_set():
+            return None
+        return self.res_queue.get(timeout=1)
     def master_close(self):
         self.eof.set()
+        self.send_finish_signal_to_worker()
         self.send_finish_signal()
-        self.res_queue.cancel_join_thread()
-        self.in_queue.cancel_join_thread()
     def send_finish_signal(self):
         self.worker_send(None)
+    def send_finish_signal_to_worker(self):
+        self.master_send(0, "QUIT")
     def worker_send(self, data):
         self.res_queue.put_until(data, timeout=1, exit_signal=self.eof)
@@ -2864,10 +3084,6 @@ class Pipe:
         func_index, *data = result
         return func_index, tuple(data)
-    def worker_close(self):
-        self.res_queue.cancel_join_thread()
-        self.in_queue.cancel_join_thread()
 def _main_process_already_exit():
     """
@@ -2885,6 +3101,8 @@ def _worker_loop(operations, pipe, seed=get_seed()):
     """
     Multiprocess worker process loop.
     """
+    # Ensure that the process does not hung when exiting
+    pipe.res_queue.cancel_join_thread()
     def _ignore_sigint():
         """
@@ -2900,9 +3118,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
         result = pipe.worker_receive()
         if result is None:
-            pipe.worker_close()
             return
         (idx, input_tensors) = result
+        if input_tensors == "QUIT":
+            break
         try:
             output_tensors = operations[idx](*input_tensors)
@@ -2911,6 +3130,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
             pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
             # Do not return
+    # release the queue when stop the worker by master
+    del pipe.in_queue
+    del pipe.res_queue
 def worker_target(operations, seed=get_seed()):
     return lambda pipe: _worker_loop(operations, pipe, seed)
@@ -2924,20 +3147,54 @@ class _MPWorker(multiprocessing.Process):
     def __init__(self, operations, warning_ctl, max_rowsize=16, seed=get_seed()):
         shared_memory = get_enable_shared_mem()
         self.pipe = Pipe(warning_ctl, shared_memory=shared_memory, max_rowsize=max_rowsize)
+        self.check_interval = get_multiprocessing_timeout_interval()
         super().__init__(target=worker_target(operations, seed), args=(self.pipe,), daemon=True)
     def execute(self, idx, *args):
+        """Acquiring data from a worker in an infinite loop"""
         self.pipe.master_send(idx, args)
-        res = self.pipe.master_receive()
-        if isinstance(res, ExceptionHandler):
-            res.reraise()
-        return res
+        time_s = time.time()
+        wait_count = 1
+        while True:
+            cost_time = time.time() - time_s
+            if cost_time / self.check_interval >= wait_count:
+                wait_count += 1
+                logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
+                               "worker of the map operation is hanging. "
+                               "Check whether the user defined data transform is too slow or the "
+                               "output data is too large. You can also set the timeout interval by "
+                               "ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
+                               "of this log.")
+                pid = self.pid
+                logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
+                install_status, _ = subprocess.getstatusoutput("py-spy --version")
+                if install_status == 0:
+                    stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
+                    logger.warning("Map worker subprocess stack:\n{}".format(stack))
+                else:
+                    logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
+            try:
+                res = self.pipe.master_receive()
+            except queue.Empty:
+                continue
+            if res is None:
+                # receive finish signal
+                return None
+            if isinstance(res, ExceptionHandler):
+                res.reraise()
+            return res
     def close(self):
         try:
             if self.is_alive():
+                # release the eager executor which is used by current process
+                transforms.transforms.clean_unused_executors()
                 logger.info(f"Closing worker with PID: {self.pid}")
                 self.pipe.master_close()
+                # del the handle which hold by master
+                del self.pipe.in_queue
+                del self.pipe.res_queue
                 super().terminate()
                 super().join()
                 super().close()
@@ -2965,6 +3222,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
         """
         def __init__(self):
+            self.origin_hook = sys.excepthook
             sys.excepthook = self.__handler_exception
         @staticmethod
@@ -2976,15 +3234,15 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
                 time.sleep(3)
         def __handler_exception(self, ex_type, value, tb):
-            logger.critical("Uncaught exception: ", exc_info=(ex_type, value, tb))
+            self.origin_hook(ex_type, value, tb)
             self.mp_pool_exit_preprocess()
-    def __init__(self, op_name, num_parallel_workers, operations, max_row_size=16):
+    def __init__(self, op_name, num_parallel_workers, operations, max_rowsize=16):
         super(_PythonMultiprocessing, self).__init__()
         self.op_name = op_name
         self.num_parallel_workers = num_parallel_workers
         self.operations = operations
-        self.max_row_size = max_row_size
+        self.max_rowsize = max_rowsize
         self.workers = None
         self.pids = None
@@ -3056,6 +3314,9 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
                                 "ds.config.set_enable_watchdog(False) to block this error.")
                 os.kill(os.getpid(), signal.SIGTERM)
+        # release the workers
+        del workers
     @staticmethod
     def _terminate_processes(processes):
         """Terminate subprocesses"""
@@ -3141,6 +3402,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
             time.sleep(0.1)
         _PythonMultiprocessing._terminate_processes(workers)
+        del workers
         os.kill(os.getpid(), signal.SIGTERM)
     def launch(self, op_id=-1):
@@ -3171,7 +3433,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
         """
         if get_enable_shared_mem():
-            self.check_shared_memory()
+            _check_shm_usage(self.num_parallel_workers, 1, self.max_rowsize[0], self.max_rowsize[1])
         if self.workers is not None:
             raise Exception("Pool was already created, close it first.")
@@ -3183,7 +3445,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
         self.workers = []
         self.warning_ctl = multiprocessing.Value('i', 0)
         for i in range(self.num_parallel_workers):
-            worker = _MPWorker(self.operations, self.warning_ctl, self.max_row_size, i + get_seed())
+            worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, i + get_seed())
             worker.start()
             self.workers.append(worker)
@@ -3197,8 +3459,11 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
         atexit.register(self.terminate)
     def terminate(self):
-        self.close_all_workers()
+        # close watch dog first and then close all the workers
         self.abort_watchdog()
+        self.close_all_workers()
+        if hasattr(self, "warning_ctl"):
+            del self.warning_ctl
     def get_pids(self):
         """
@@ -3242,12 +3507,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
     def is_mp_enabled(self):
         return self.workers is not None
-    def check_shared_memory(self):
-        """
-        Check if there is enough shared memory in the system.
-        """
-        _check_shm_usage(self.num_parallel_workers, 1, self.max_row_size, 2)
     def execute(self, idx, *args):
         """
         Execute
@@ -3294,6 +3553,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
             self._abort_watchdog()
         if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
             _PythonMultiprocessing._terminate_processes([self.cleaning_process])
+            del self.cleaning_process
     def is_running(self):
         if hasattr(self, 'workers') and self.workers is not None:
@@ -3301,9 +3561,34 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
         return False
     def close_all_workers(self):
+        """Close all the subprocess workers"""
         if hasattr(self, 'workers') and self.workers is not None:
             for w in self.workers:
                 w.close()
+            check_interval = get_multiprocessing_timeout_interval()
+            for w in self.workers:
+                try:
+                    subprocess_file_descriptor = w.sentinel
+                    st = time.time()
+                    while _PythonMultiprocessing.is_process_alive(w.pid):
+                        time.sleep(0.01)  # sleep 10ms, waiting for the subprocess exit
+                        if time.time() - st > check_interval:
+                            logger.warning("Waiting for the subprocess worker [{}] to exit.".format(w.pid))
+                            st += check_interval
+                except ValueError as e:
+                    if "process object is closed" in str(e):
+                        continue
+                    raise e
+                try:
+                    if w.is_alive():
+                        os.close(subprocess_file_descriptor)
+                except OSError as e:
+                    # Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
+                    if "Bad file descriptor" not in str(e):
+                        raise e
+            # use clear to release the handle which is better than self.workers = None
+            self.workers.clear()
             self.workers = None
             self.pids = None
@@ -3315,24 +3600,29 @@ class MapDataset(UnionBaseDataset):
     Args:
         input_dataset (Dataset): Input Dataset to be mapped.
         operations (Union[list[TensorOperation], list[functions]]): A function mapping a nested structure of tensors
-            to another nested structure of tensor. Default: None.
+            to another nested structure of tensor. Default: ``None``.
         input_columns (Union[str, list[str]]): List of names of the input columns.
-            Default: None, the operations will be applied on the first columns in the dataset.
+            Default: ``None``, the operations will be applied on the first columns in the dataset.
             The size of the list should match the number of inputs of the first operation.
         output_columns (Union[str, list[str]], optional): List of names of the output columns.
             The size of the list should match the number of outputs of the last operation.
-            Default: None, output columns will be the input columns, i.e., the columns will
+            Default: ``None``, output columns will be the input columns, i.e., the columns will
             be replaced.
         num_parallel_workers (int, optional): Number of workers to process the dataset
-            in parallel. Default: None.
+            in parallel. Default: ``None``.
         python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
-            option could be beneficial if the Python operation is computational heavy. Default: False.
+            option could be beneficial if the Python operation is computational heavy. Default: ``False``.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            Default: None, which means no cache is used.
-        callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: None.
-        max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
-            data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
-        offload (bool, optional): Flag to indicate whether offload is used. Default: None.
+            Default: ``None``, which means no cache is used.
+        callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: ``None``.
+        max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
+            allocation to copy data between processes, the total occupied shared memory will increase as
+            ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
+            used if python_multiprocessing is set to True. If it is an int value, it represents ``input_columns`` and
+            ``output_columns`` use this value as the unit to create shared memory. If it is a list, the first element
+            represents the ``input_columns`` use this value as the unit to create shared memory, and the second element
+            represents ``output_columns`` use this value as the unit to create shared memory. Default: 16.
+        offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
     """
     def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
@@ -3362,7 +3652,10 @@ class MapDataset(UnionBaseDataset):
         self.process_pool = None
         self.callbacks = to_list(callbacks)
-        self.max_rowsize = max_rowsize
+        if isinstance(max_rowsize, int):
+            self.max_rowsize = [max_rowsize] * 2
+        else:
+            self.max_rowsize = max_rowsize
         self.offload = offload
     def parse(self, children=None):
@@ -3400,7 +3693,7 @@ class MapDataset(UnionBaseDataset):
         callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
         return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
-                           callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
+                           callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
     def __deepcopy__(self, memodict):
         return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
@@ -3426,7 +3719,7 @@ class MapDataset(UnionBaseDataset):
         return op_name
     @staticmethod
-    def __construct_debug_hook(previous_op_name=None):
+    def __construct_debug_hook(previous_op_name=None, is_first_op=False):
         """
         Wrap debug hook into FuncWrapper.
         """
@@ -3437,6 +3730,7 @@ class MapDataset(UnionBaseDataset):
                 # making deep copy to allow each debug hook instance hold unique variables
                 new_fn = copy.deepcopy(fn)
                 new_fn.set_previous_op_name(previous_op_name)
+                new_fn.set_is_first(is_first_op)
                 inserted_func = transforms.py_transforms_util.FuncWrapper(new_fn)
                 inserted_func.implementation = Implementation.PY
                 inserted_functions.append(inserted_func)
@@ -3558,7 +3852,8 @@ class MapDataset(UnionBaseDataset):
         """
         if not get_debug_mode():
             return operations
-        inserted_operations = self.__construct_debug_hook()
+        first_op_name = self.__parse_op_name(operations[0])
+        inserted_operations = self.__construct_debug_hook(first_op_name, is_first_op=True)
         for op in operations:
             inserted_operations.append(op)
             op_name = self.__parse_op_name(op)
@@ -3588,9 +3883,9 @@ class FilterDataset(UnionBaseDataset):
         input_dataset (Dataset): Input Dataset to be mapped.
         predicate (callable): Python callable which returns a boolean value. If False then filter the element.
         input_columns (Union[str, list[str]], optional): List of names of the input columns.
-            Default: None, the predicate will be applied to all columns in the dataset.
+            Default: ``None``, the predicate will be applied to all columns in the dataset.
         num_parallel_workers (int, optional): Number of workers to process the dataset
-            in parallel. Default: None.
+            in parallel. Default: ``None``.
     """
     def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None):
@@ -3702,6 +3997,8 @@ class ConcatDataset(UnionBaseDataset):
                                  "valid samples in the dataset." % child_index)
             child_index += 1
+        self._children_sizes = self.children_sizes_.copy()
         # _children_flag_and_nums: A list of pair<int ,int>.The first element of pair is flag that characterizes
         # whether the dataset is mappable. The second element of pair is length of the dataset
         self._children_flag_and_nums = []
@@ -3725,7 +4022,8 @@ class ConcatDataset(UnionBaseDataset):
                 self._children_flag_and_nums.append((1, dataset_len))
     def parse(self, children=None):
-        return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_)
+        return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_,
+                              self._children_sizes)
     def use_sampler(self, sampler):
         """
@@ -3741,8 +4039,19 @@ class ConcatDataset(UnionBaseDataset):
             ValueError: If the parameter NumSamples of sampler is not None.
             ValueError: If num_shards <=0.
         """
-        if not isinstance(sampler, samplers.DistributedSampler):
-            raise TypeError("The parameter %s of concat must be DistributedSampler!" % sampler)
+        if not isinstance(sampler, (samplers.DistributedSampler, samplers.RandomSampler)):
+            raise TypeError("The parameter %s of concat must be DistributedSampler or RandomSampler!" % sampler)
+        if isinstance(sampler, samplers.RandomSampler):
+            if sampler.replacement:
+                raise ValueError("The parameter replacement of RandomSampler must be False!")
+            if sampler.get_num_samples() is not None:
+                raise ValueError("The parameter num_samples of RandomSampler is not support to be set!")
+            self._sampler = sampler
+            self._children_sizes = [c.get_dataset_size() for c in self.children]
+            return
         if sampler.is_shuffled():
             raise ValueError("The parameter shuffle of DistributedSampler must be False!")
@@ -3845,7 +4154,12 @@ class _ToDevice:
         self._runtime_context = cde.PythonRuntimeContext()
         self._runtime_context.Init()
         self._to_device = cde.ToDevice(num_epochs)
-        self._to_device.Init(ir_tree)
+        if dataset.get_init_step() != 0:
+            init_step = dataset.get_init_step()
+            dataset_size = dataset.get_dataset_size()
+            self._to_device.Init(ir_tree, init_step, dataset_size)
+        else:
+            self._to_device.Init(ir_tree, 0, -1)
         self._runtime_context.AssignConsumer(self._to_device)
         ITERATORS_LIST.append(weakref.ref(self))
@@ -3872,6 +4186,14 @@ class _ToDevice:
         """
         return self._to_device.GetDataInfo()
+    def get_send_info(self):
+        """
+        In sink mode, it returns the send information of dataset at this moment.
+        Send information includes number of send batches, time summary of fetching data on host
+        and time summary of sending data.
+        """
+        return self._to_device.GetSendInfo()
     def release(self):
         """
         Manually terminate Device Queue instead of relying on out of scope destruction.
@@ -3892,8 +4214,8 @@ class _ToDevice:
         offload_model = GetOffloadModel(self._to_device, col_names)
         return offload_model
-    def _reset(self, step, epoch):
-        self._to_device.Reset(step, epoch)
+    def _reset(self, step, dataset_size):
+        self._to_device.Reset(step, dataset_size)
 class TransferDataset(Dataset):
@@ -3902,9 +4224,9 @@ class TransferDataset(Dataset):
     Args:
         input_dataset (Dataset): Input Dataset to be transferred.
-        send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
+        send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: ``True``.
         create_data_info_queue (bool, optional): Whether to create queue which stores
-            types and shapes of data or not. Default: False.
+            types and shapes of data or not. Default: ``False``.
     Raises:
         TypeError: If device_type is empty.
@@ -3912,9 +4234,14 @@ class TransferDataset(Dataset):
         RuntimeError: If dataset is unknown.
     """
-    def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False):
+    def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
         super().__init__(children=input_dataset)
-        self.queue_name = str(uuid.uuid1())
+        if queue_name == "":
+            self.queue_name = str(uuid.uuid1())
+            logger.info(f"queue_name is newly generated. value is {self.queue_name}")
+        else:
+            self.queue_name = queue_name
+            logger.info(f"queue_name is read from compile cache. value is {self.queue_name}")
         self.device_type = context.get_context("device_target") if context else "CPU"
         self.device_id = context.get_context("device_id") if context else 0
@@ -3973,6 +4300,16 @@ class TransferDataset(Dataset):
             return self._to_device.get_data_info()
         raise RuntimeError("Calling get_data_info with bad state.")
+    def get_send_info(self):
+        """
+        In sink mode, it returns the send information of dataset at this moment.
+        Send information includes number of send batches, time summary of fetching data on host
+        and time summary of sending data.
+        """
+        if self._to_device is not None:
+            return self._to_device.get_send_info()
+        raise RuntimeError("Calling get_send_info with bad state, data queue is not initialized.")
     def get_offload_model(self):
         if self._to_device is not None:
             return self._to_device.get_offload_model(self.column_name)
@@ -3986,10 +4323,10 @@ class TransferDataset(Dataset):
         if self._to_device is not None:
             self._to_device.release()
-    def _reset(self, step, epoch):
+    def _reset(self, step, dataset_size):
         if self._to_device is not None:
-            logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(epoch))
-            self._to_device._reset(step, epoch)  # pylint: disable=protected-access
+            logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(step // dataset_size))
+            self._to_device._reset(step, dataset_size)  # pylint: disable=protected-access
 class Schema:
@@ -3997,15 +4334,13 @@ class Schema:
     Class to represent a schema of a dataset.
     Args:
-        schema_file(str): Path of the schema file. Default: None.
-    Returns:
-        Schema object, schema info about dataset.
+        schema_file (str): Path of the schema file. Default: ``None``.
     Raises:
         RuntimeError: If schema file failed to load.
     Examples:
+        >>> import mindspore.dataset as ds
         >>> from mindspore import dtype as mstype
         >>>
         >>> # Create schema; specify column name, mindspore.dtype and shape of the column
@@ -4027,16 +4362,17 @@ class Schema:
             name (str): The new name of the column.
             de_type (str): Data type of the column.
             shape (list[int], optional): Shape of the column.
-                Default: None, [-1] which is an unknown shape of rank 1.
+                Default: ``None``, [-1] which is an unknown shape of rank 1.
         Raises:
             ValueError: If column type is unknown.
         Examples:
-        >>> from mindspore import dtype as mstype
-        >>>
-        >>> schema = ds.Schema()
-        >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
+            >>> import mindspore.dataset as ds
+            >>> from mindspore import dtype as mstype
+            >>>
+            >>> schema = ds.Schema()
+            >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
         """
         if isinstance(de_type, typing.Type):
             de_type = mstype_to_detype(de_type)
@@ -4084,9 +4420,11 @@ class Schema:
         Examples:
             >>> from mindspore.dataset import Schema
+            >>> from mindspore import dtype as mstype
             >>>
-            >>> schema1 = ds.Schema()
-            >>> schema2 = schema1.to_json()
+            >>> schema = Schema()
+            >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
+            >>> json = schema.to_json()
         """
         return self.cpp_schema.to_json()
@@ -4104,12 +4442,11 @@ class Schema:
         Examples:
             >>> import json
-            >>>
             >>> from mindspore.dataset import Schema
             >>>
-            >>> with open("/path/to/schema_file") as file:
+            >>> with open("/path/to/schema_file", "r") as file:
             ...     json_obj = json.load(file)
-            ...     schema = ds.Schema()
+            ...     schema = Schema()
             ...     schema.from_json(json_obj)
         """
         self.cpp_schema.from_string(json.dumps(json_obj, indent=2))