mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2019-
|
|
1
|
+
# Copyright 2019-2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -45,28 +45,33 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
45
45
|
|
|
46
46
|
Args:
|
|
47
47
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
48
|
-
usage (str, optional): Acceptable usages include 'train', 'test' and 'all'
|
|
49
|
-
|
|
48
|
+
usage (str, optional): Acceptable usages include ``'train'`` , ``'test'`` and ``'all'`` .
|
|
49
|
+
Default: ``None`` , all samples.
|
|
50
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` ,
|
|
51
|
+
reads the full dataset.
|
|
50
52
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
51
|
-
Default: None, will use global default workers(8), it can be set
|
|
52
|
-
by
|
|
53
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
54
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
53
55
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
54
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
55
|
-
|
|
56
|
-
If `shuffle` is
|
|
56
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
57
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
58
|
+
If `shuffle` is ``False``, no shuffling will be performed.
|
|
59
|
+
If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
|
|
60
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
57
61
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
58
62
|
|
|
59
|
-
- Shuffle.GLOBAL
|
|
63
|
+
- ``Shuffle.GLOBAL``: Shuffle both the files and samples.
|
|
60
64
|
|
|
61
|
-
- Shuffle.FILES
|
|
65
|
+
- ``Shuffle.FILES``: Shuffle files only.
|
|
62
66
|
|
|
63
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into.
|
|
64
|
-
When this argument is specified, `num_samples` reflects the
|
|
67
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into.
|
|
68
|
+
Default: ``None``. When this argument is specified, `num_samples` reflects the
|
|
69
|
+
max sample number of per shard.
|
|
65
70
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
66
|
-
argument can only be specified when `num_shards` is also specified. Default: None
|
|
71
|
+
argument can only be specified when `num_shards` is also specified. Default: ``None``.
|
|
67
72
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
68
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
69
|
-
Default: None
|
|
73
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
74
|
+
Default: ``None``, which means no cache is used.
|
|
70
75
|
|
|
71
76
|
Raises:
|
|
72
77
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -74,7 +79,12 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
74
79
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
75
80
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
76
81
|
|
|
82
|
+
Tutorial Examples:
|
|
83
|
+
- `Load & Process Data With Dataset Pipeline
|
|
84
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
85
|
+
|
|
77
86
|
Examples:
|
|
87
|
+
>>> import mindspore.dataset as ds
|
|
78
88
|
>>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
|
|
79
89
|
>>> dataset = ds.AGNewsDataset(dataset_dir=ag_news_dataset_dir, usage='all')
|
|
80
90
|
|
|
@@ -139,34 +149,37 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
139
149
|
Args:
|
|
140
150
|
dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
|
|
141
151
|
or the Amazon Review Full dataset.
|
|
142
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
143
|
-
For Polarity dataset, 'train' will read from 3,600,000 train samples,
|
|
144
|
-
'test' will read from 400,000 test samples,
|
|
145
|
-
'all' will read from all 4,000,000 samples.
|
|
146
|
-
For Full dataset, 'train' will read from 3,000,000 train samples,
|
|
147
|
-
'test' will read from 650,000 test samples,
|
|
148
|
-
'all' will read from all 3,650,000 samples. Default: None
|
|
149
|
-
num_samples (int, optional): Number of samples (rows) to be read. Default: None
|
|
152
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
153
|
+
For Polarity dataset, ``'train'`` will read from 3,600,000 train samples,
|
|
154
|
+
``'test'`` will read from 400,000 test samples,
|
|
155
|
+
``'all'`` will read from all 4,000,000 samples.
|
|
156
|
+
For Full dataset, ``'train'`` will read from 3,000,000 train samples,
|
|
157
|
+
``'test'`` will read from 650,000 test samples,
|
|
158
|
+
``'all'`` will read from all 3,650,000 samples. Default: ``None``, all samples.
|
|
159
|
+
num_samples (int, optional): Number of samples (rows) to be read. Default: ``None``,
|
|
160
|
+
reads the full dataset.
|
|
150
161
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
151
|
-
Default: None, will use global default workers(8), it can be set
|
|
152
|
-
by
|
|
162
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
163
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
153
164
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
154
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
155
|
-
|
|
156
|
-
If `shuffle` is
|
|
165
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
166
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
167
|
+
If `shuffle` is ``False``, no shuffling will be performed.
|
|
168
|
+
If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
|
|
169
|
+
``mindspore.dataset.Shuffle.GLOBAL``.
|
|
157
170
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
158
171
|
|
|
159
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
172
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
160
173
|
|
|
161
|
-
- Shuffle.FILES: Shuffle files only.
|
|
174
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
162
175
|
|
|
163
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
176
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
164
177
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
165
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
178
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
166
179
|
argument can only be specified when `num_shards` is also specified.
|
|
167
180
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
168
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
169
|
-
Default: None, which means no cache is used.
|
|
181
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
182
|
+
Default: ``None`` , which means no cache is used.
|
|
170
183
|
|
|
171
184
|
Raises:
|
|
172
185
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -174,7 +187,12 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
174
187
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
175
188
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
176
189
|
|
|
190
|
+
Tutorial Examples:
|
|
191
|
+
- `Load & Process Data With Dataset Pipeline
|
|
192
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
193
|
+
|
|
177
194
|
Examples:
|
|
195
|
+
>>> import mindspore.dataset as ds
|
|
178
196
|
>>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
|
|
179
197
|
>>> dataset = ds.AmazonReviewDataset(dataset_dir=amazon_review_dataset_dir, usage='all')
|
|
180
198
|
|
|
@@ -228,36 +246,38 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
228
246
|
class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
229
247
|
"""
|
|
230
248
|
CLUE(Chinese Language Understanding Evaluation) dataset.
|
|
231
|
-
Supported CLUE classification tasks: 'AFQMC', 'TNEWS'
|
|
249
|
+
Supported CLUE classification tasks: ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``,
|
|
250
|
+
``'WSC'`` and ``'CSL'``.
|
|
232
251
|
|
|
233
252
|
Args:
|
|
234
253
|
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
|
|
235
254
|
a pattern of files. The list will be sorted in a lexicographical order.
|
|
236
|
-
task (str, optional): The kind of task, one of 'AFQMC', 'TNEWS'
|
|
237
|
-
Default: 'AFQMC'.
|
|
238
|
-
usage (str, optional): Specify the 'train'
|
|
255
|
+
task (str, optional): The kind of task, one of ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``,
|
|
256
|
+
``'WSC'`` and ``'CSL'``. Default: ``'AFQMC'`` .
|
|
257
|
+
usage (str, optional): Specify the ``'train'``, ``'test'`` or ``'eval'`` part of dataset.
|
|
258
|
+
Default: ``'train'``.
|
|
239
259
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
240
|
-
Default: None, will include all images.
|
|
260
|
+
Default: ``None`` , will include all images.
|
|
241
261
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
242
|
-
Default: None, will use global default workers(8), it can be set
|
|
243
|
-
by
|
|
262
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
263
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
244
264
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
245
|
-
Default: Shuffle.GLOBAL. Bool type and Shuffle enum are both supported to pass in.
|
|
246
|
-
If shuffle is False
|
|
247
|
-
If shuffle is True
|
|
248
|
-
There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
|
|
265
|
+
Default: ``Shuffle.GLOBAL`` . Bool type and Shuffle enum are both supported to pass in.
|
|
266
|
+
If `shuffle` is ``False``, no shuffling will be performed.
|
|
267
|
+
If `shuffle` is ``True``, performs global shuffle.
|
|
268
|
+
There are three levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
|
|
249
269
|
|
|
250
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True
|
|
270
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting `shuffle` to ``True``.
|
|
251
271
|
|
|
252
|
-
- Shuffle.FILES: Shuffle files only.
|
|
272
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
253
273
|
|
|
254
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
274
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
255
275
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
256
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
276
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
257
277
|
argument can only be specified when `num_shards` is also specified.
|
|
258
278
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
259
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
260
|
-
Default: None, which means no cache is used.
|
|
279
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
280
|
+
Default: ``None`` , which means no cache is used.
|
|
261
281
|
|
|
262
282
|
The generated dataset with different task setting has different output columns:
|
|
263
283
|
|
|
@@ -403,14 +423,20 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
403
423
|
|
|
404
424
|
Raises:
|
|
405
425
|
ValueError: If dataset_files are not valid or do not exist.
|
|
406
|
-
ValueError: task is not in 'AFQMC', 'TNEWS'
|
|
407
|
-
|
|
426
|
+
ValueError: task is not in ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``, ``'WSC'``
|
|
427
|
+
or ``'CSL'``.
|
|
428
|
+
ValueError: usage is not in ``'train'``, ``'test'`` or ``'eval'``.
|
|
408
429
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
409
430
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
410
431
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
411
432
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
412
433
|
|
|
434
|
+
Tutorial Examples:
|
|
435
|
+
- `Load & Process Data With Dataset Pipeline
|
|
436
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
437
|
+
|
|
413
438
|
Examples:
|
|
439
|
+
>>> import mindspore.dataset as ds
|
|
414
440
|
>>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
|
|
415
441
|
>>> dataset = ds.CLUEDataset(dataset_files=clue_dataset_dir, task='AFQMC', usage='train')
|
|
416
442
|
|
|
@@ -471,30 +497,33 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
471
497
|
|
|
472
498
|
Args:
|
|
473
499
|
dataset_dir (str): Path to the root directory that contains the CoNLL2000 chunking dataset.
|
|
474
|
-
usage (str, optional): Usage of dataset, can be 'train', 'test', or 'all'.
|
|
475
|
-
For dataset, 'train' will read from 8,936 train samples,
|
|
476
|
-
'test' will read from 2,012 test samples,
|
|
477
|
-
'all' will read from all 1,0948 samples. Default: None, read all samples.
|
|
478
|
-
num_samples (int, optional): Number of samples (rows) to be read. Default: None,
|
|
500
|
+
usage (str, optional): Usage of dataset, can be ``'train'`` , ``'test'`` , or ``'all'`` .
|
|
501
|
+
For dataset, ``'train'`` will read from 8,936 train samples,
|
|
502
|
+
``'test'`` will read from 2,012 test samples,
|
|
503
|
+
``'all'`` will read from all 1,0948 samples. Default: ``None`` , read all samples.
|
|
504
|
+
num_samples (int, optional): Number of samples (rows) to be read. Default: ``None`` ,
|
|
505
|
+
read the full dataset.
|
|
479
506
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
480
|
-
Default:
|
|
481
|
-
If shuffle is False, no shuffling will be performed.
|
|
482
|
-
If shuffle is True, performs global shuffle.
|
|
483
|
-
There are three levels of shuffling, desired shuffle enum defined by
|
|
507
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
508
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
509
|
+
If `shuffle` is ``True`` , performs global shuffle.
|
|
510
|
+
There are three levels of shuffling, desired shuffle enum defined by
|
|
511
|
+
:class:`mindspore.dataset.Shuffle` .
|
|
484
512
|
|
|
485
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True
|
|
486
|
-
- Shuffle.FILES: Shuffle files only.
|
|
513
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting `shuffle` to ``True``.
|
|
514
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
487
515
|
|
|
488
516
|
num_shards (int, optional): Number of shards that the dataset will be divided into.
|
|
489
|
-
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
517
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
518
|
+
Default: ``None`` .
|
|
490
519
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
491
|
-
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
520
|
+
argument can only be specified when `num_shards` is also specified. Default: ``None`` .
|
|
492
521
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
493
|
-
Default: None, will use global default workers(8), it can be set
|
|
494
|
-
by
|
|
522
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
523
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
495
524
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
496
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
497
|
-
Default: None, which means no cache is used.
|
|
525
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
526
|
+
Default: ``None`` , which means no cache is used.
|
|
498
527
|
|
|
499
528
|
Raises:
|
|
500
529
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -502,7 +531,12 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
502
531
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
503
532
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
504
533
|
|
|
534
|
+
Tutorial Examples:
|
|
535
|
+
- `Load & Process Data With Dataset Pipeline
|
|
536
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
537
|
+
|
|
505
538
|
Examples:
|
|
539
|
+
>>> import mindspore.dataset as ds
|
|
506
540
|
>>> conll2000_dataset_dir = "/path/to/conll2000_dataset_dir"
|
|
507
541
|
>>> dataset = ds.CoNLL2000Dataset(dataset_dir=conll2000_dataset_dir, usage='all')
|
|
508
542
|
|
|
@@ -561,32 +595,34 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
561
595
|
|
|
562
596
|
Args:
|
|
563
597
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
564
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
565
|
-
'train' will read from 560,000 train samples,
|
|
566
|
-
'test' will read from 70,000 test samples,
|
|
567
|
-
'all' will read from all 630,000 samples. Default: None, all samples.
|
|
598
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
599
|
+
``'train'`` will read from 560,000 train samples,
|
|
600
|
+
``'test'`` will read from 70,000 test samples,
|
|
601
|
+
``'all'`` will read from all 630,000 samples. Default: ``None`` , all samples.
|
|
568
602
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
569
|
-
Default: None, will include all text.
|
|
603
|
+
Default: ``None`` , will include all text.
|
|
570
604
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
571
|
-
Default: None, will use global default workers(8), it can be set
|
|
572
|
-
by
|
|
605
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
606
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
573
607
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
574
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
575
|
-
|
|
576
|
-
If shuffle is
|
|
608
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
609
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
610
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
611
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
612
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
577
613
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
578
614
|
|
|
579
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
615
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
580
616
|
|
|
581
|
-
- Shuffle.FILES: Shuffle files only.
|
|
617
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
582
618
|
|
|
583
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
619
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
584
620
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
585
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
621
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
586
622
|
argument can only be specified when `num_shards` is also specified.
|
|
587
623
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
588
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
589
|
-
Default: None, which means no cache is used.
|
|
624
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
625
|
+
Default: ``None`` , which means no cache is used.
|
|
590
626
|
|
|
591
627
|
Raises:
|
|
592
628
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -595,7 +631,12 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
595
631
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
596
632
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
597
633
|
|
|
634
|
+
Tutorial Examples:
|
|
635
|
+
- `Load & Process Data With Dataset Pipeline
|
|
636
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
637
|
+
|
|
598
638
|
Examples:
|
|
639
|
+
>>> import mindspore.dataset as ds
|
|
599
640
|
>>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
|
|
600
641
|
>>>
|
|
601
642
|
>>> # 1) Read 3 samples from DBpedia dataset
|
|
@@ -659,27 +700,28 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
|
659
700
|
Args:
|
|
660
701
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
661
702
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
662
|
-
Default: None, will include all samples.
|
|
703
|
+
Default: ``None`` , will include all samples.
|
|
663
704
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
664
|
-
Default: None, will use global default workers(8), it can be set
|
|
665
|
-
by
|
|
705
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
706
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
666
707
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
667
|
-
Bool type and Shuffle enum are both supported to pass in. Default: True
|
|
668
|
-
If shuffle is False, no shuffling will be performed.
|
|
669
|
-
If shuffle is True, it is equivalent to setting `shuffle` to
|
|
708
|
+
Bool type and Shuffle enum are both supported to pass in. Default: ``True``.
|
|
709
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
710
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
711
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
670
712
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
671
713
|
|
|
672
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
714
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
673
715
|
|
|
674
|
-
- Shuffle.FILES: Shuffle files only.
|
|
716
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
675
717
|
|
|
676
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
718
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
677
719
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
678
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
720
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
679
721
|
argument can only be specified when `num_shards` is also specified.
|
|
680
722
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
681
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
682
|
-
Default: None, which means no cache is used.
|
|
723
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
724
|
+
Default: ``None`` , which means no cache is used.
|
|
683
725
|
|
|
684
726
|
Raises:
|
|
685
727
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -687,7 +729,12 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
|
687
729
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
688
730
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
689
731
|
|
|
732
|
+
Tutorial Examples:
|
|
733
|
+
- `Load & Process Data With Dataset Pipeline
|
|
734
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
735
|
+
|
|
690
736
|
Examples:
|
|
737
|
+
>>> import mindspore.dataset as ds
|
|
691
738
|
>>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
|
|
692
739
|
>>> dataset2 = ds.EnWik9Dataset(dataset_dir=en_wik9_dataset_dir, num_samples=2,
|
|
693
740
|
... shuffle=True)
|
|
@@ -744,25 +791,25 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
744
791
|
|
|
745
792
|
Args:
|
|
746
793
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
747
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
748
|
-
Default: None, will read all samples.
|
|
794
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
795
|
+
Default: ``None`` , will read all samples.
|
|
749
796
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
750
|
-
Default: None, will include all samples.
|
|
797
|
+
Default: ``None`` , will include all samples.
|
|
751
798
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
752
|
-
Default: None, will use global default workers(8), it can be set
|
|
753
|
-
by
|
|
799
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
800
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
754
801
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
755
|
-
Default: None, expected order behavior shown in the table below.
|
|
802
|
+
Default: ``None`` , expected order behavior shown in the table below.
|
|
756
803
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
757
|
-
Default: None, expected order behavior shown in the table below.
|
|
804
|
+
Default: ``None`` , expected order behavior shown in the table below.
|
|
758
805
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
759
|
-
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
806
|
+
into. Default: ``None`` . When this argument is specified, `num_samples` reflects
|
|
760
807
|
the maximum sample number of per shard.
|
|
761
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
808
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
762
809
|
argument can only be specified when `num_shards` is also specified.
|
|
763
810
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
764
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
765
|
-
Default: None, which means no cache is used.
|
|
811
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
812
|
+
Default: ``None`` , which means no cache is used.
|
|
766
813
|
|
|
767
814
|
Raises:
|
|
768
815
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -773,38 +820,19 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
773
820
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
774
821
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
775
822
|
|
|
823
|
+
Tutorial Examples:
|
|
824
|
+
- `Load & Process Data With Dataset Pipeline
|
|
825
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
826
|
+
|
|
776
827
|
Note:
|
|
777
828
|
- The shape of the test column.
|
|
778
|
-
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
..
|
|
782
|
-
:widths: 25 25 50
|
|
783
|
-
:header-rows: 1
|
|
784
|
-
|
|
785
|
-
* - Parameter `sampler`
|
|
786
|
-
- Parameter `shuffle`
|
|
787
|
-
- Expected Order Behavior
|
|
788
|
-
* - None
|
|
789
|
-
- None
|
|
790
|
-
- random order
|
|
791
|
-
* - None
|
|
792
|
-
- True
|
|
793
|
-
- random order
|
|
794
|
-
* - None
|
|
795
|
-
- False
|
|
796
|
-
- sequential order
|
|
797
|
-
* - Sampler object
|
|
798
|
-
- None
|
|
799
|
-
- order defined by sampler
|
|
800
|
-
* - Sampler object
|
|
801
|
-
- True
|
|
802
|
-
- not allowed
|
|
803
|
-
* - Sampler object
|
|
804
|
-
- False
|
|
805
|
-
- not allowed
|
|
829
|
+
- The parameters `num_samples` , `shuffle` , `num_shards` , `shard_id` can be used to control the sampler
|
|
830
|
+
used in the dataset, and their effects when combined with parameter `sampler` are as follows.
|
|
831
|
+
|
|
832
|
+
.. include:: mindspore.dataset.sampler.txt
|
|
806
833
|
|
|
807
834
|
Examples:
|
|
835
|
+
>>> import mindspore.dataset as ds
|
|
808
836
|
>>> imdb_dataset_dir = "/path/to/imdb_dataset_directory"
|
|
809
837
|
>>>
|
|
810
838
|
>>> # 1) Read all samples (text files) in imdb_dataset_dir with 8 threads
|
|
@@ -887,37 +915,40 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
887
915
|
|
|
888
916
|
Args:
|
|
889
917
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
890
|
-
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None,
|
|
918
|
+
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: ``None`` ,
|
|
919
|
+
all samples.
|
|
891
920
|
language_pair (sequence, optional): Sequence containing source and target language, supported values are
|
|
892
|
-
('en', 'fr')
|
|
893
|
-
('ar', 'en')
|
|
921
|
+
``('en', 'fr')``, ``('en', 'de')``, ``('en', 'cs')``, ``('en', 'ar')``, ``('fr', 'en')``,
|
|
922
|
+
``('de', 'en')``, ``('cs', 'en')``, ``('ar', 'en')``. Default: ``None``, set to ``('de', 'en')``.
|
|
894
923
|
valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
|
|
895
|
-
of `valid_set` type will be read, supported values are 'dev2010'
|
|
896
|
-
and 'tst2014'
|
|
924
|
+
of `valid_set` type will be read, supported values are ``'dev2010'``, ``'tst2010'``, ``'tst2011'``,
|
|
925
|
+
``'tst2012'``, ``'tst2013'`` and ``'tst2014'``. Default: ``None``, set to ``'tst2013'``.
|
|
897
926
|
test_set (str, optional): A string to identify test set, when usage is test or all, the test set of `test_set`
|
|
898
|
-
type will be read, supported values are 'dev2010'
|
|
899
|
-
Default: 'tst2014'
|
|
900
|
-
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
927
|
+
type will be read, supported values are ``'dev2010'``, ``'tst2010'``, ``'tst2011'``, ``'tst2012'``,
|
|
928
|
+
``'tst2013'`` and ``'tst2014'``. Default: ``None``, set to ``'tst2014'``.
|
|
929
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
|
|
901
930
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
902
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
903
|
-
|
|
904
|
-
If `shuffle` is
|
|
931
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
932
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
933
|
+
If `shuffle` is ``False``, no shuffling will be performed.
|
|
934
|
+
If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
|
|
935
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
905
936
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
906
937
|
|
|
907
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
938
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
908
939
|
|
|
909
|
-
- Shuffle.FILES: Shuffle files only.
|
|
940
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
910
941
|
|
|
911
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
942
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
912
943
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
913
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
944
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
914
945
|
argument can only be specified when `num_shards` is also specified.
|
|
915
946
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
916
|
-
Default: None, will use global default workers(8), it can be set
|
|
917
|
-
by
|
|
947
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
948
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
918
949
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
919
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
920
|
-
Default: None, which means no cache is used.
|
|
950
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
951
|
+
Default: ``None`` , which means no cache is used.
|
|
921
952
|
|
|
922
953
|
Raises:
|
|
923
954
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -925,7 +956,12 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
925
956
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
926
957
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
927
958
|
|
|
959
|
+
Tutorial Examples:
|
|
960
|
+
- `Load & Process Data With Dataset Pipeline
|
|
961
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
962
|
+
|
|
928
963
|
Examples:
|
|
964
|
+
>>> import mindspore.dataset as ds
|
|
929
965
|
>>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
|
|
930
966
|
>>> dataset = ds.IWSLT2016Dataset(dataset_dir=iwslt2016_dataset_dir, usage='all',
|
|
931
967
|
... language_pair=('de', 'en'), valid_set='tst2013', test_set='tst2014')
|
|
@@ -1015,32 +1051,36 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1015
1051
|
|
|
1016
1052
|
Args:
|
|
1017
1053
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1018
|
-
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None,
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
('
|
|
1022
|
-
('
|
|
1023
|
-
|
|
1054
|
+
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: ``None`` ,
|
|
1055
|
+
all samples.
|
|
1056
|
+
language_pair (sequence, optional): List containing src and tgt language, supported values are ``('en', 'nl')``,
|
|
1057
|
+
``('en', 'de')``, ``('en', 'it')``, ``('en', 'ro')``, ``('nl', 'en')``, ``('nl', 'de')``, ``('nl', 'it')``,
|
|
1058
|
+
``('nl', 'ro')``, ``('de', 'en')``, ``('de', 'nl')``, ``('de', 'it')``, ``('de', 'ro')``, ``('it', 'en')``,
|
|
1059
|
+
``('it', 'nl')``, ``('it', 'de')``, ``('it', 'ro')``, ``('ro', 'en')``, ``('ro', 'nl')``, ``('ro', 'de')``,
|
|
1060
|
+
``('ro', 'it')``. Default: ``None``, set to ``('de', 'en')``.
|
|
1061
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
|
|
1024
1062
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1025
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1026
|
-
|
|
1027
|
-
If shuffle is
|
|
1063
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1064
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1065
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1066
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1067
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1028
1068
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1029
1069
|
|
|
1030
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1070
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1031
1071
|
|
|
1032
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1072
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1033
1073
|
|
|
1034
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1074
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1035
1075
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1036
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1076
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1037
1077
|
argument can only be specified when `num_shards` is also specified.
|
|
1038
1078
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1039
|
-
Default: None, will use global default workers(8), it can be set
|
|
1040
|
-
by
|
|
1079
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1080
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1041
1081
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1042
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1043
|
-
Default: None, which means no cache is used.
|
|
1082
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1083
|
+
Default: ``None`` , which means no cache is used.
|
|
1044
1084
|
|
|
1045
1085
|
Raises:
|
|
1046
1086
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1048,7 +1088,12 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1048
1088
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1049
1089
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1050
1090
|
|
|
1091
|
+
Tutorial Examples:
|
|
1092
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1093
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1094
|
+
|
|
1051
1095
|
Examples:
|
|
1096
|
+
>>> import mindspore.dataset as ds
|
|
1052
1097
|
>>> iwslt2017_dataset_dir = "/path/to/iwslt2017_dataset_dir"
|
|
1053
1098
|
>>> dataset = ds.IWSLT2017Dataset(dataset_dir=iwslt2017_dataset_dir, usage='all', language_pair=('de', 'en'))
|
|
1054
1099
|
|
|
@@ -1114,35 +1159,37 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1114
1159
|
|
|
1115
1160
|
Args:
|
|
1116
1161
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1117
|
-
usage (str, optional): Acceptable usages include 'train'
|
|
1118
|
-
Default: None, will read all samples.
|
|
1119
|
-
language_pair (Sequence[str, str], optional): Acceptable language_pair include ['en', 'de']
|
|
1120
|
-
Default: None, means ['en', 'de']
|
|
1162
|
+
usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
|
|
1163
|
+
Default: ``None`` , will read all samples.
|
|
1164
|
+
language_pair (Sequence[str, str], optional): Acceptable language_pair include ``['en', 'de']``,
|
|
1165
|
+
``['de', 'en']``. Default: ``None`` , means ``['en', 'de']``.
|
|
1121
1166
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1122
|
-
Default: None, will read all samples.
|
|
1167
|
+
Default: ``None`` , will read all samples.
|
|
1123
1168
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1124
|
-
Default: None, will use global default workers(8), it can be set
|
|
1125
|
-
by
|
|
1126
|
-
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: None,
|
|
1127
|
-
|
|
1128
|
-
If
|
|
1169
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1170
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1171
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: ``None`` ,
|
|
1172
|
+
means ``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1173
|
+
If ``False`` is provided, no shuffling will be performed.
|
|
1174
|
+
If ``True`` is provided, it is the same as setting to
|
|
1175
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1129
1176
|
If Shuffle is provided, the effect is as follows:
|
|
1130
1177
|
|
|
1131
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1132
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1178
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1179
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1133
1180
|
|
|
1134
1181
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
1135
|
-
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
1182
|
+
into. Default: ``None`` . When this argument is specified, `num_samples` reflects
|
|
1136
1183
|
the max sample number of per shard.
|
|
1137
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1184
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1138
1185
|
argument can only be specified when `num_shards` is also specified.
|
|
1139
1186
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1140
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1141
|
-
Default: None, which means no cache is used.
|
|
1187
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1188
|
+
Default: ``None`` , which means no cache is used.
|
|
1142
1189
|
|
|
1143
1190
|
Raises:
|
|
1144
1191
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1145
|
-
ValueError: If `usage` is not 'train'
|
|
1192
|
+
ValueError: If `usage` is not ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
|
|
1146
1193
|
TypeError: If `language_pair` is not of type Sequence[str, str].
|
|
1147
1194
|
RuntimeError: If num_samples is less than 0.
|
|
1148
1195
|
RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
@@ -1150,7 +1197,12 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1150
1197
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1151
1198
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1152
1199
|
|
|
1200
|
+
Tutorial Examples:
|
|
1201
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1202
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1203
|
+
|
|
1153
1204
|
Examples:
|
|
1205
|
+
>>> import mindspore.dataset as ds
|
|
1154
1206
|
>>> multi30k_dataset_dir = "/path/to/multi30k_dataset_directory"
|
|
1155
1207
|
>>> data = ds.Multi30kDataset(dataset_dir=multi30k_dataset_dir, usage='all', language_pair=['de', 'en'])
|
|
1156
1208
|
|
|
@@ -1215,32 +1267,34 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1215
1267
|
|
|
1216
1268
|
Args:
|
|
1217
1269
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1218
|
-
usage (str, optional): Acceptable usages include 'train'
|
|
1219
|
-
'train' will read from 42,068 train samples of string type,
|
|
1220
|
-
'test' will read from 3,370 test samples of string type,
|
|
1221
|
-
'valid' will read from 3,761 test samples of string type,
|
|
1222
|
-
'all' will read from all 49,199 samples of string type. Default: None, all samples.
|
|
1223
|
-
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1270
|
+
usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` and ``'all'``.
|
|
1271
|
+
``'train'`` will read from 42,068 train samples of string type,
|
|
1272
|
+
``'test'`` will read from 3,370 test samples of string type,
|
|
1273
|
+
``'valid'`` will read from 3,761 test samples of string type,
|
|
1274
|
+
``'all'`` will read from all 49,199 samples of string type. Default: ``None`` , all samples.
|
|
1275
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
|
|
1224
1276
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1225
|
-
Default: None, will use global default workers(8), it can be set
|
|
1226
|
-
by
|
|
1277
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1278
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1227
1279
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1228
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1229
|
-
|
|
1230
|
-
If shuffle is
|
|
1280
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1281
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1282
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1283
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1284
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1231
1285
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1232
1286
|
|
|
1233
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1287
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1234
1288
|
|
|
1235
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1289
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1236
1290
|
|
|
1237
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1291
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1238
1292
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1239
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1293
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1240
1294
|
argument can only be specified when `num_shards` is also specified.
|
|
1241
1295
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1242
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1243
|
-
Default: None, which means no cache is used.
|
|
1296
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1297
|
+
Default: ``None`` , which means no cache is used.
|
|
1244
1298
|
|
|
1245
1299
|
Raises:
|
|
1246
1300
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1248,7 +1302,12 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1248
1302
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1249
1303
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1250
1304
|
|
|
1305
|
+
Tutorial Examples:
|
|
1306
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1307
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1308
|
+
|
|
1251
1309
|
Examples:
|
|
1310
|
+
>>> import mindspore.dataset as ds
|
|
1252
1311
|
>>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
|
|
1253
1312
|
>>> dataset = ds.PennTreebankDataset(dataset_dir=penn_treebank_dataset_dir, usage='all')
|
|
1254
1313
|
|
|
@@ -1313,29 +1372,31 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
1313
1372
|
|
|
1314
1373
|
Args:
|
|
1315
1374
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1316
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' .
|
|
1317
|
-
'train' will read from 450,000 train samples, 'test' will read from 60,000 test samples,
|
|
1318
|
-
'all' will read from all 510,000 samples. Default: None, all samples.
|
|
1319
|
-
num_samples (int, optional): Number of samples (rows) to read. Default: None, read all samples.
|
|
1375
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
1376
|
+
``'train'`` will read from 450,000 train samples, ``'test'`` will read from 60,000 test samples,
|
|
1377
|
+
``'all'`` will read from all 510,000 samples. Default: ``None`` , all samples.
|
|
1378
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , read all samples.
|
|
1320
1379
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1321
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1322
|
-
|
|
1323
|
-
If shuffle is
|
|
1380
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1381
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1382
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1383
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1384
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1324
1385
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1325
1386
|
|
|
1326
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
|
|
1387
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting shuffle to True.
|
|
1327
1388
|
|
|
1328
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1329
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1389
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1390
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1330
1391
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1331
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1392
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1332
1393
|
argument can only be specified when `num_shards` is also specified.
|
|
1333
1394
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1334
|
-
Default: None, will use global default workers(8), it can be set
|
|
1335
|
-
by
|
|
1395
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1396
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1336
1397
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1337
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1338
|
-
Default: None, which means no cache is used.
|
|
1398
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1399
|
+
Default: ``None`` , which means no cache is used.
|
|
1339
1400
|
|
|
1340
1401
|
Raises:
|
|
1341
1402
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1343,7 +1404,12 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
1343
1404
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1344
1405
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1345
1406
|
|
|
1407
|
+
Tutorial Examples:
|
|
1408
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1409
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1410
|
+
|
|
1346
1411
|
Examples:
|
|
1412
|
+
>>> import mindspore.dataset as ds
|
|
1347
1413
|
>>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
|
|
1348
1414
|
>>> dataset = ds.SogouNewsDataset(dataset_dir=sogou_news_dataset_dir, usage='all')
|
|
1349
1415
|
|
|
@@ -1405,27 +1471,30 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1405
1471
|
|
|
1406
1472
|
Args:
|
|
1407
1473
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1408
|
-
usage (str, optional): Specify the 'train'
|
|
1474
|
+
usage (str, optional): Specify the ``'train'``, ``'dev'`` or ``'all'`` part of dataset.
|
|
1475
|
+
Default: ``None`` , all samples.
|
|
1409
1476
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1410
|
-
Default: None, will include all samples.
|
|
1477
|
+
Default: ``None`` , will include all samples.
|
|
1411
1478
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1412
|
-
Default: None, will use global default workers(8), it can be set
|
|
1413
|
-
by
|
|
1414
|
-
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset.
|
|
1415
|
-
|
|
1416
|
-
If
|
|
1479
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1480
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1481
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset.
|
|
1482
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1483
|
+
If ``False`` is provided, no shuffling will be performed.
|
|
1484
|
+
If ``True`` is provided, it is the same as setting to
|
|
1485
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1417
1486
|
If Shuffle is provided, the effect is as follows:
|
|
1418
1487
|
|
|
1419
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1420
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1488
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1489
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1421
1490
|
|
|
1422
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1491
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1423
1492
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1424
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1493
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1425
1494
|
argument can only be specified when `num_shards` is also specified.
|
|
1426
1495
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1427
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1428
|
-
Default: None, which means no cache is used.
|
|
1496
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1497
|
+
Default: ``None`` , which means no cache is used.
|
|
1429
1498
|
|
|
1430
1499
|
Raises:
|
|
1431
1500
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1434,7 +1503,12 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1434
1503
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1435
1504
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1436
1505
|
|
|
1506
|
+
Tutorial Examples:
|
|
1507
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1508
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1509
|
+
|
|
1437
1510
|
Examples:
|
|
1511
|
+
>>> import mindspore.dataset as ds
|
|
1438
1512
|
>>> squad_dataset_dir = "/path/to/squad_dataset_file"
|
|
1439
1513
|
>>> dataset = ds.SQuADDataset(dataset_dir=squad_dataset_dir, usage='all')
|
|
1440
1514
|
|
|
@@ -1515,38 +1589,44 @@ class SST2Dataset(SourceDataset, TextBaseDataset):
|
|
|
1515
1589
|
|
|
1516
1590
|
Args:
|
|
1517
1591
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1518
|
-
usage (str, optional): Usage of this dataset, can be
|
|
1519
|
-
from 67,349 train samples,
|
|
1520
|
-
all 872 samples. Default: None, will read train samples.
|
|
1592
|
+
usage (str, optional): Usage of this dataset, can be ``"train"``, ``"test"`` or ``"dev"``.
|
|
1593
|
+
``"train"`` will read from 67,349 train samples, ``"test"`` will read from 1,821 test samples,
|
|
1594
|
+
``"dev"`` will read from all 872 samples. Default: ``None`` , will read train samples.
|
|
1521
1595
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1522
|
-
Default: None, will include all text.
|
|
1596
|
+
Default: ``None`` , will include all text.
|
|
1523
1597
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1524
|
-
Default: None, will use global default workers(8), it can be set
|
|
1525
|
-
by
|
|
1598
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1599
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1526
1600
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1527
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1528
|
-
|
|
1529
|
-
If shuffle is
|
|
1601
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1602
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1603
|
+
If `shuffle` is ``False`` , no shuffling will be performed;
|
|
1604
|
+
If `shuffle` is ``True`` , the behavior is the same as setting shuffle to be Shuffle.GLOBAL
|
|
1530
1605
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1531
1606
|
|
|
1532
|
-
- Shuffle.GLOBAL: Shuffle the samples.
|
|
1607
|
+
- ``Shuffle.GLOBAL`` : Shuffle the samples.
|
|
1533
1608
|
|
|
1534
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1609
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1535
1610
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1536
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
1537
|
-
num_shards is also specified. Default: None.
|
|
1611
|
+
shard_id (int, optional): The shard ID within `num_shards`. This argument can only be specified when
|
|
1612
|
+
`num_shards` is also specified. Default: ``None`` .
|
|
1538
1613
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1539
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1540
|
-
Default: None, which means no cache is used.
|
|
1614
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1615
|
+
Default: ``None`` , which means no cache is used.
|
|
1541
1616
|
|
|
1542
1617
|
Raises:
|
|
1543
1618
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1544
1619
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1545
1620
|
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
1546
|
-
RuntimeError: If `shard_id` is specified but num_shards is None.
|
|
1621
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1547
1622
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1548
1623
|
|
|
1624
|
+
Tutorial Examples:
|
|
1625
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1626
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1627
|
+
|
|
1549
1628
|
Examples:
|
|
1629
|
+
>>> import mindspore.dataset as ds
|
|
1550
1630
|
>>> sst2_dataset_dir = "/path/to/sst2_dataset_directory"
|
|
1551
1631
|
>>>
|
|
1552
1632
|
>>> # 1) Read 3 samples from SST2 dataset
|
|
@@ -1614,27 +1694,28 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1614
1694
|
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
|
|
1615
1695
|
pattern of files. The list will be sorted in a lexicographical order.
|
|
1616
1696
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1617
|
-
Default: None, will include all images.
|
|
1697
|
+
Default: ``None`` , will include all images.
|
|
1618
1698
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1619
|
-
Default: None, will use global default workers(8), it can be set
|
|
1620
|
-
by
|
|
1699
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1700
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1621
1701
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1622
|
-
Default:
|
|
1623
|
-
|
|
1624
|
-
If shuffle is
|
|
1625
|
-
|
|
1702
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1703
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1704
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1705
|
+
If `shuffle` is ``True`` , performs global shuffle.
|
|
1706
|
+
There are three levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
|
|
1626
1707
|
|
|
1627
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
|
|
1708
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting shuffle to True.
|
|
1628
1709
|
|
|
1629
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1710
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1630
1711
|
|
|
1631
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1712
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1632
1713
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1633
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1714
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1634
1715
|
argument can only be specified when `num_shards` is also specified.
|
|
1635
1716
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1636
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1637
|
-
Default: None, which means no cache is used.
|
|
1717
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1718
|
+
Default: ``None`` , which means no cache is used.
|
|
1638
1719
|
|
|
1639
1720
|
Raises:
|
|
1640
1721
|
ValueError: If dataset_files are not valid or do not exist.
|
|
@@ -1643,9 +1724,14 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1643
1724
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1644
1725
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1645
1726
|
|
|
1727
|
+
Tutorial Examples:
|
|
1728
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1729
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1730
|
+
|
|
1646
1731
|
Examples:
|
|
1647
|
-
>>>
|
|
1648
|
-
>>>
|
|
1732
|
+
>>> import mindspore.dataset as ds
|
|
1733
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
|
|
1734
|
+
>>> dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1649
1735
|
"""
|
|
1650
1736
|
|
|
1651
1737
|
@check_textfiledataset
|
|
@@ -1670,30 +1756,33 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
|
1670
1756
|
|
|
1671
1757
|
Args:
|
|
1672
1758
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1673
|
-
usage (str, optional): Usage of this dataset, can be 'train'
|
|
1674
|
-
|
|
1675
|
-
'
|
|
1676
|
-
|
|
1759
|
+
usage (str, optional): Usage of this dataset, can be ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
|
|
1760
|
+
``'train'`` will read from 12,543 train samples, ``'test'`` will read from 2,077 test samples,
|
|
1761
|
+
``'valid'`` will read from 2,002 test samples, ``'all'`` will read from all 16,622 samples.
|
|
1762
|
+
Default: ``None`` , all samples.
|
|
1763
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
|
|
1677
1764
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1678
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1679
|
-
|
|
1680
|
-
If shuffle is
|
|
1765
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1766
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1767
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1768
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1769
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1681
1770
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1682
1771
|
|
|
1683
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1772
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1684
1773
|
|
|
1685
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1774
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1686
1775
|
|
|
1687
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1776
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1688
1777
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1689
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1778
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1690
1779
|
argument can only be specified when `num_shards` is also specified.
|
|
1691
1780
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1692
|
-
Default: None, will use global default workers(8), it can be set
|
|
1693
|
-
by
|
|
1781
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1782
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1694
1783
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1695
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1696
|
-
Default: None, which means no cache is used.
|
|
1784
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1785
|
+
Default: ``None`` , which means no cache is used.
|
|
1697
1786
|
|
|
1698
1787
|
Raises:
|
|
1699
1788
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1701,7 +1790,12 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
|
1701
1790
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1702
1791
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1703
1792
|
|
|
1793
|
+
Tutorial Examples:
|
|
1794
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1795
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1796
|
+
|
|
1704
1797
|
Examples:
|
|
1798
|
+
>>> import mindspore.dataset as ds
|
|
1705
1799
|
>>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
|
|
1706
1800
|
>>> dataset = ds.UDPOSDataset(dataset_dir=udpos_dataset_dir, usage='all')
|
|
1707
1801
|
|
|
@@ -1747,28 +1841,31 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1747
1841
|
|
|
1748
1842
|
Args:
|
|
1749
1843
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1750
|
-
usage (str, optional): Acceptable usages include 'train'
|
|
1751
|
-
|
|
1844
|
+
usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` and ``'all'``.
|
|
1845
|
+
Default: ``None`` , all samples.
|
|
1846
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
|
|
1752
1847
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1753
|
-
Default: None, will use global default workers(8), it can be set
|
|
1754
|
-
by
|
|
1848
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1849
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1755
1850
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1756
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1757
|
-
|
|
1758
|
-
If shuffle is
|
|
1851
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1852
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1853
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1854
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1855
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1759
1856
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1760
1857
|
|
|
1761
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1858
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1762
1859
|
|
|
1763
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1860
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1764
1861
|
|
|
1765
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1862
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1766
1863
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1767
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1864
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1768
1865
|
argument can only be specified when `num_shards` is also specified.
|
|
1769
1866
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1770
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1771
|
-
Default: None, which means no cache is used.
|
|
1867
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1868
|
+
Default: ``None`` , which means no cache is used.
|
|
1772
1869
|
|
|
1773
1870
|
Raises:
|
|
1774
1871
|
RuntimeError: If `dataset_dir` does not contain data files or invalid.
|
|
@@ -1778,6 +1875,10 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1778
1875
|
ValueError: If `num_samples` is invalid (< 0).
|
|
1779
1876
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1780
1877
|
|
|
1878
|
+
Tutorial Examples:
|
|
1879
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1880
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1881
|
+
|
|
1781
1882
|
About WikiTextDataset dataset:
|
|
1782
1883
|
|
|
1783
1884
|
The WikiText Long Term Dependency Language Modeling Dataset is an English lexicon containing 100 million words.
|
|
@@ -1809,6 +1910,7 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1809
1910
|
}
|
|
1810
1911
|
|
|
1811
1912
|
Examples:
|
|
1913
|
+
>>> import mindspore.dataset as ds
|
|
1812
1914
|
>>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
|
|
1813
1915
|
>>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
|
|
1814
1916
|
"""
|
|
@@ -1834,31 +1936,33 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1834
1936
|
|
|
1835
1937
|
Args:
|
|
1836
1938
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1837
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
1838
|
-
from 1,400,000 train samples, 'test' will read from 60,000 test
|
|
1839
|
-
all 1,460,000 samples. Default: None, all samples.
|
|
1939
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
1940
|
+
``'train'`` will read from 1,400,000 train samples, ``'test'`` will read from 60,000 test
|
|
1941
|
+
samples, ``'all'`` will read from all 1,460,000 samples. Default: ``None`` , all samples.
|
|
1840
1942
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1841
|
-
Default: None, will include all text.
|
|
1943
|
+
Default: ``None`` , will include all text.
|
|
1842
1944
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1843
|
-
Default: None, will use global default workers(8), it can be set
|
|
1844
|
-
by
|
|
1945
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
1946
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1845
1947
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1846
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1847
|
-
|
|
1848
|
-
If shuffle is
|
|
1948
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
1949
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
1950
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
1951
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
1952
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1849
1953
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1850
1954
|
|
|
1851
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1955
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1852
1956
|
|
|
1853
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1957
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
1854
1958
|
|
|
1855
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1959
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1856
1960
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1857
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1961
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1858
1962
|
argument can only be specified when `num_shards` is also specified.
|
|
1859
1963
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1860
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1861
|
-
Default: None, which means no cache is used.
|
|
1964
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
1965
|
+
Default: ``None`` , which means no cache is used.
|
|
1862
1966
|
|
|
1863
1967
|
Raises:
|
|
1864
1968
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1867,7 +1971,12 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1867
1971
|
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1868
1972
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1869
1973
|
|
|
1974
|
+
Tutorial Examples:
|
|
1975
|
+
- `Load & Process Data With Dataset Pipeline
|
|
1976
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
1977
|
+
|
|
1870
1978
|
Examples:
|
|
1979
|
+
>>> import mindspore.dataset as ds
|
|
1871
1980
|
>>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
|
|
1872
1981
|
>>>
|
|
1873
1982
|
>>> # 1) Read 3 samples from YahooAnswers dataset
|
|
@@ -1929,31 +2038,34 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
1929
2038
|
|
|
1930
2039
|
Args:
|
|
1931
2040
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1932
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
1933
|
-
For Polarity, 'train' will read from 560,000 train samples,
|
|
1934
|
-
'
|
|
1935
|
-
|
|
1936
|
-
'
|
|
1937
|
-
|
|
2041
|
+
usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
|
|
2042
|
+
For Polarity, ``'train'`` will read from 560,000 train samples,
|
|
2043
|
+
``'test'`` will read from 38,000 test samples,
|
|
2044
|
+
``'all'`` will read from all 598,000 samples.
|
|
2045
|
+
For Full, ``'train'`` will read from 650,000 train samples, ``'test'`` will read from 50,000 test samples,
|
|
2046
|
+
``'all'`` will read from all 700,000 samples. Default: ``None`` , all samples.
|
|
2047
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads all samples.
|
|
1938
2048
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1939
|
-
Bool type and Shuffle enum are both supported to pass in.
|
|
1940
|
-
|
|
1941
|
-
If shuffle is
|
|
2049
|
+
Bool type and Shuffle enum are both supported to pass in.
|
|
2050
|
+
Default: ``Shuffle.GLOBAL`` .
|
|
2051
|
+
If `shuffle` is ``False`` , no shuffling will be performed.
|
|
2052
|
+
If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
|
|
2053
|
+
``mindspore.dataset.Shuffle.GLOBAL`` .
|
|
1942
2054
|
Set the mode of data shuffling by passing in enumeration variables:
|
|
1943
2055
|
|
|
1944
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
2056
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
1945
2057
|
|
|
1946
|
-
- Shuffle.FILES: Shuffle files only.
|
|
1947
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
2058
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
2059
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
|
|
1948
2060
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1949
|
-
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2061
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1950
2062
|
argument can only be specified when `num_shards` is also specified.
|
|
1951
2063
|
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1952
|
-
Default: None, will use global default workers(8), it can be set
|
|
1953
|
-
by
|
|
2064
|
+
Default: ``None`` , will use global default workers(8), it can be set
|
|
2065
|
+
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
1954
2066
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1955
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
1956
|
-
Default: None, which means no cache is used.
|
|
2067
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.2/dataset/cache.html>`_ .
|
|
2068
|
+
Default: ``None`` , which means no cache is used.
|
|
1957
2069
|
|
|
1958
2070
|
Raises:
|
|
1959
2071
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -1961,7 +2073,12 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
1961
2073
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1962
2074
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1963
2075
|
|
|
2076
|
+
Tutorial Examples:
|
|
2077
|
+
- `Load & Process Data With Dataset Pipeline
|
|
2078
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/dataset_gallery.html>`_
|
|
2079
|
+
|
|
1964
2080
|
Examples:
|
|
2081
|
+
>>> import mindspore.dataset as ds
|
|
1965
2082
|
>>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"
|
|
1966
2083
|
>>> dataset = ds.YelpReviewDataset(dataset_dir=yelp_review_dataset_dir, usage='all')
|
|
1967
2084
|
|