mindspore 2.0.0a0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -2
- mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/amp.py +52 -57
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -30,14 +30,15 @@ from .validators import check_imdb_dataset, check_iwslt2016_dataset, check_iwslt
|
|
|
30
30
|
check_penn_treebank_dataset, check_ag_news_dataset, check_amazon_review_dataset, check_udpos_dataset, \
|
|
31
31
|
check_wiki_text_dataset, check_conll2000_dataset, check_cluedataset, \
|
|
32
32
|
check_sogou_news_dataset, check_textfiledataset, check_dbpedia_dataset, check_yelp_review_dataset, \
|
|
33
|
-
check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset
|
|
33
|
+
check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset, \
|
|
34
|
+
check_sst2_dataset
|
|
34
35
|
|
|
35
36
|
from ..core.validator_helpers import replace_none
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
39
40
|
"""
|
|
40
|
-
|
|
41
|
+
AG News dataset.
|
|
41
42
|
|
|
42
43
|
The generated dataset has three columns: :py:obj:`[index, title, description]` ,
|
|
43
44
|
and the data type of three columns is string type.
|
|
@@ -46,8 +47,9 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
46
47
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
47
48
|
usage (str, optional): Acceptable usages include 'train', 'test' and 'all'. Default: None, all samples.
|
|
48
49
|
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
49
|
-
num_parallel_workers (int, optional): Number of
|
|
50
|
-
Default: None,
|
|
50
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
51
|
+
Default: None, will use global default workers(8), it can be set
|
|
52
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
51
53
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
52
54
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
53
55
|
If `shuffle` is False, no shuffling will be performed.
|
|
@@ -63,7 +65,7 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
63
65
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
64
66
|
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
65
67
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
66
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
68
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
67
69
|
Default: None, which means no cache is used.
|
|
68
70
|
|
|
69
71
|
Raises:
|
|
@@ -129,7 +131,7 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
129
131
|
|
|
130
132
|
class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
131
133
|
"""
|
|
132
|
-
|
|
134
|
+
Amazon Review Polarity and Amazon Review Full datasets.
|
|
133
135
|
|
|
134
136
|
The generated dataset has three columns: :py:obj:`[label, title, content]` ,
|
|
135
137
|
and the data type of three columns is string.
|
|
@@ -145,8 +147,9 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
145
147
|
'test' will read from 650,000 test samples,
|
|
146
148
|
'all' will read from all 3,650,000 samples. Default: None, all samples.
|
|
147
149
|
num_samples (int, optional): Number of samples (rows) to be read. Default: None, reads the full dataset.
|
|
148
|
-
num_parallel_workers (int, optional): Number of
|
|
149
|
-
Default: None,
|
|
150
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
151
|
+
Default: None, will use global default workers(8), it can be set
|
|
152
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
150
153
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
151
154
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
152
155
|
If `shuffle` is False, no shuffling will be performed.
|
|
@@ -162,7 +165,7 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
162
165
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
163
166
|
argument can only be specified when `num_shards` is also specified.
|
|
164
167
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
165
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
168
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
166
169
|
Default: None, which means no cache is used.
|
|
167
170
|
|
|
168
171
|
Raises:
|
|
@@ -224,7 +227,7 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
224
227
|
|
|
225
228
|
class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
226
229
|
"""
|
|
227
|
-
|
|
230
|
+
CLUE(Chinese Language Understanding Evaluation) dataset.
|
|
228
231
|
Supported CLUE classification tasks: 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
|
|
229
232
|
|
|
230
233
|
Args:
|
|
@@ -235,8 +238,9 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
235
238
|
usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset. Default: 'train'.
|
|
236
239
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
237
240
|
Default: None, will include all images.
|
|
238
|
-
num_parallel_workers (int, optional): Number of
|
|
239
|
-
Default: None,
|
|
241
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
242
|
+
Default: None, will use global default workers(8), it can be set
|
|
243
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
240
244
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
241
245
|
Default: Shuffle.GLOBAL. Bool type and Shuffle enum are both supported to pass in.
|
|
242
246
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -252,7 +256,7 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
252
256
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
253
257
|
argument can only be specified when `num_shards` is also specified.
|
|
254
258
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
255
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
259
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
256
260
|
Default: None, which means no cache is used.
|
|
257
261
|
|
|
258
262
|
The generated dataset with different task setting has different output columns:
|
|
@@ -402,7 +406,7 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
402
406
|
ValueError: task is not in 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' or 'CSL'.
|
|
403
407
|
ValueError: usage is not in 'train', 'test' or 'eval'.
|
|
404
408
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
405
|
-
ValueError: If `shard_id` is
|
|
409
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
406
410
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
407
411
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
408
412
|
|
|
@@ -459,7 +463,7 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
459
463
|
|
|
460
464
|
class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
461
465
|
"""
|
|
462
|
-
|
|
466
|
+
CoNLL-2000(Conference on Computational Natural Language Learning) chunking dataset.
|
|
463
467
|
|
|
464
468
|
The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]` .
|
|
465
469
|
The tensors of column :py:obj:`word` , column :py:obj:`pos_tag` ,
|
|
@@ -485,10 +489,11 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
485
489
|
When this argument is specified, `num_samples` reflects the max sample number of per shard. Default: None.
|
|
486
490
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
487
491
|
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
488
|
-
num_parallel_workers (int, optional): Number of
|
|
489
|
-
Default: None,
|
|
492
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
493
|
+
Default: None, will use global default workers(8), it can be set
|
|
494
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
490
495
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
491
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
496
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
492
497
|
Default: None, which means no cache is used.
|
|
493
498
|
|
|
494
499
|
Raises:
|
|
@@ -549,7 +554,7 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
549
554
|
|
|
550
555
|
class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
551
556
|
"""
|
|
552
|
-
|
|
557
|
+
DBpedia dataset.
|
|
553
558
|
|
|
554
559
|
The generated dataset has three columns :py:obj:`[class, title, content]` ,
|
|
555
560
|
and the data type of three columns is string.
|
|
@@ -562,8 +567,9 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
562
567
|
'all' will read from all 630,000 samples. Default: None, all samples.
|
|
563
568
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
564
569
|
Default: None, will include all text.
|
|
565
|
-
num_parallel_workers (int, optional): Number of
|
|
566
|
-
Default: None,
|
|
570
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
571
|
+
Default: None, will use global default workers(8), it can be set
|
|
572
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
567
573
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
568
574
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
569
575
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -579,7 +585,7 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
579
585
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
580
586
|
argument can only be specified when `num_shards` is also specified.
|
|
581
587
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
582
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
588
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
583
589
|
Default: None, which means no cache is used.
|
|
584
590
|
|
|
585
591
|
Raises:
|
|
@@ -587,7 +593,7 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
587
593
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
588
594
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
589
595
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
590
|
-
ValueError: If `shard_id` is
|
|
596
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
591
597
|
|
|
592
598
|
Examples:
|
|
593
599
|
>>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
|
|
@@ -646,19 +652,17 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
646
652
|
|
|
647
653
|
class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
648
654
|
"""
|
|
649
|
-
|
|
655
|
+
EnWik9 dataset.
|
|
650
656
|
|
|
651
657
|
The generated dataset has one column :py:obj:`[text]` with type string.
|
|
652
658
|
|
|
653
659
|
Args:
|
|
654
660
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
655
661
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
num_parallel_workers (int, optional): Number of workers to read the data.
|
|
661
|
-
Default: None, number set in the mindspore.dataset.config.
|
|
662
|
+
Default: None, will include all samples.
|
|
663
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
664
|
+
Default: None, will use global default workers(8), it can be set
|
|
665
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
662
666
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
663
667
|
Bool type and Shuffle enum are both supported to pass in. Default: True.
|
|
664
668
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -674,7 +678,7 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
|
674
678
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
675
679
|
argument can only be specified when `num_shards` is also specified.
|
|
676
680
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
677
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
681
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
678
682
|
Default: None, which means no cache is used.
|
|
679
683
|
|
|
680
684
|
Raises:
|
|
@@ -732,7 +736,7 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
|
732
736
|
|
|
733
737
|
class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
734
738
|
"""
|
|
735
|
-
|
|
739
|
+
IMDb(Internet Movie Database) dataset.
|
|
736
740
|
|
|
737
741
|
The generated dataset has two columns: :py:obj:`[text, label]` .
|
|
738
742
|
The tensor of column :py:obj:`text` is of the string type.
|
|
@@ -743,12 +747,10 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
743
747
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
744
748
|
Default: None, will read all samples.
|
|
745
749
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
746
|
-
For Polarity dataset, 'train' will read from 3,600,000 train samples, 'test' will read from 400,000 test
|
|
747
|
-
samples, 'all' will read from all 4,000,000 samples. For Full dataset, 'train' will read from 3,000,000
|
|
748
|
-
train samples, 'test' will read from 650,000 test samples, 'all' will read from all 3,650,000 samples.
|
|
749
750
|
Default: None, will include all samples.
|
|
750
|
-
num_parallel_workers (int, optional): Number of
|
|
751
|
-
Default: None,
|
|
751
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
752
|
+
Default: None, will use global default workers(8), it can be set
|
|
753
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
752
754
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
753
755
|
Default: None, expected order behavior shown in the table below.
|
|
754
756
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
@@ -759,7 +761,7 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
759
761
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
760
762
|
argument can only be specified when `num_shards` is also specified.
|
|
761
763
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
762
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
764
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
763
765
|
Default: None, which means no cache is used.
|
|
764
766
|
|
|
765
767
|
Raises:
|
|
@@ -769,7 +771,7 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
769
771
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
770
772
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
771
773
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
772
|
-
ValueError: If `shard_id` is
|
|
774
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
773
775
|
|
|
774
776
|
Note:
|
|
775
777
|
- The shape of the test column.
|
|
@@ -877,7 +879,7 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
877
879
|
|
|
878
880
|
class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
879
881
|
"""
|
|
880
|
-
|
|
882
|
+
IWSLT2016(International Workshop on Spoken Language Translation) dataset.
|
|
881
883
|
|
|
882
884
|
The generated dataset has two columns: :py:obj:`[text, translation]` .
|
|
883
885
|
The tensor of column :py:obj: `text` is of the string type.
|
|
@@ -910,10 +912,11 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
910
912
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
911
913
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
912
914
|
argument can only be specified when `num_shards` is also specified.
|
|
913
|
-
num_parallel_workers (int, optional): Number of
|
|
914
|
-
Default: None,
|
|
915
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
916
|
+
Default: None, will use global default workers(8), it can be set
|
|
917
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
915
918
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
916
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
919
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
917
920
|
Default: None, which means no cache is used.
|
|
918
921
|
|
|
919
922
|
Raises:
|
|
@@ -1005,7 +1008,7 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
1005
1008
|
|
|
1006
1009
|
class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
1007
1010
|
"""
|
|
1008
|
-
|
|
1011
|
+
IWSLT2017(International Workshop on Spoken Language Translation) dataset.
|
|
1009
1012
|
|
|
1010
1013
|
The generated dataset has two columns: :py:obj:`[text, translation]` .
|
|
1011
1014
|
The tensor of column :py:obj:`text` and :py:obj:`translation` are of the string type.
|
|
@@ -1032,10 +1035,11 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1032
1035
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1033
1036
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1034
1037
|
argument can only be specified when `num_shards` is also specified.
|
|
1035
|
-
num_parallel_workers (int, optional): Number of
|
|
1036
|
-
Default: None,
|
|
1038
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1039
|
+
Default: None, will use global default workers(8), it can be set
|
|
1040
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1037
1041
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1038
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1042
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1039
1043
|
Default: None, which means no cache is used.
|
|
1040
1044
|
|
|
1041
1045
|
Raises:
|
|
@@ -1102,29 +1106,29 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1102
1106
|
|
|
1103
1107
|
class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
1104
1108
|
"""
|
|
1105
|
-
|
|
1109
|
+
Multi30k dataset.
|
|
1106
1110
|
|
|
1107
1111
|
The generated dataset has two columns :py:obj:`[text, translation]` .
|
|
1108
|
-
The tensor of column :py:obj
|
|
1109
|
-
The tensor of column :py:obj
|
|
1112
|
+
The tensor of column :py:obj:`text` is of the string type.
|
|
1113
|
+
The tensor of column :py:obj:`translation` is of the string type.
|
|
1110
1114
|
|
|
1111
|
-
|
|
1115
|
+
Args:
|
|
1112
1116
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1113
|
-
usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all'.
|
|
1114
|
-
|
|
1115
|
-
|
|
1117
|
+
usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all'.
|
|
1118
|
+
Default: None, will read all samples.
|
|
1119
|
+
language_pair (Sequence[str, str], optional): Acceptable language_pair include ['en', 'de'], ['de', 'en'].
|
|
1120
|
+
Default: None, means ['en', 'de'].
|
|
1116
1121
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1117
|
-
Default: None, all samples.
|
|
1118
|
-
num_parallel_workers (int, optional): Number of
|
|
1119
|
-
Default: None,
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
If
|
|
1123
|
-
If
|
|
1124
|
-
|
|
1122
|
+
Default: None, will read all samples.
|
|
1123
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1124
|
+
Default: None, will use global default workers(8), it can be set
|
|
1125
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1126
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: None, means Shuffle.GLOBAL.
|
|
1127
|
+
If False is provided, no shuffling will be performed.
|
|
1128
|
+
If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
|
|
1129
|
+
If Shuffle is provided, the effect is as follows:
|
|
1125
1130
|
|
|
1126
1131
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1127
|
-
|
|
1128
1132
|
- Shuffle.FILES: Shuffle files only.
|
|
1129
1133
|
|
|
1130
1134
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
@@ -1133,17 +1137,18 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1133
1137
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1134
1138
|
argument can only be specified when `num_shards` is also specified.
|
|
1135
1139
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1136
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1140
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1137
1141
|
Default: None, which means no cache is used.
|
|
1138
1142
|
|
|
1139
1143
|
Raises:
|
|
1140
1144
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1145
|
+
ValueError: If `usage` is not 'train', 'test', 'valid' or 'all'.
|
|
1146
|
+
TypeError: If `language_pair` is not of type Sequence[str, str].
|
|
1147
|
+
RuntimeError: If num_samples is less than 0.
|
|
1148
|
+
RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1144
1149
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1145
1150
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1146
|
-
|
|
1151
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1147
1152
|
|
|
1148
1153
|
Examples:
|
|
1149
1154
|
>>> multi30k_dataset_dir = "/path/to/multi30k_dataset_directory"
|
|
@@ -1151,11 +1156,11 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1151
1156
|
|
|
1152
1157
|
About Multi30k dataset:
|
|
1153
1158
|
|
|
1154
|
-
Multi30K is a dataset
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
+
Multi30K is a multilingual dataset that features approximately 31,000 standardized images
|
|
1160
|
+
described in multiple languages. The images are sourced from Flickr and each image comes
|
|
1161
|
+
with sentence descripitions in both English and German, as well as descriptions in other
|
|
1162
|
+
languages. Multi30k is used primarily for training and testing in tasks such as image
|
|
1163
|
+
captioning, machine translation, and visual question answering.
|
|
1159
1164
|
|
|
1160
1165
|
You can unzip the dataset files into the following directory structure and read by MindSpore's API.
|
|
1161
1166
|
|
|
@@ -1203,7 +1208,7 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1203
1208
|
|
|
1204
1209
|
class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
1205
1210
|
"""
|
|
1206
|
-
|
|
1211
|
+
PennTreebank dataset.
|
|
1207
1212
|
|
|
1208
1213
|
The generated dataset has one column :py:obj:`[text]` .
|
|
1209
1214
|
The tensor of column :py:obj:`text` is of the string type.
|
|
@@ -1216,8 +1221,9 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1216
1221
|
'valid' will read from 3,761 test samples of string type,
|
|
1217
1222
|
'all' will read from all 49,199 samples of string type. Default: None, all samples.
|
|
1218
1223
|
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1219
|
-
num_parallel_workers (int, optional): Number of
|
|
1220
|
-
Default: None,
|
|
1224
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1225
|
+
Default: None, will use global default workers(8), it can be set
|
|
1226
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1221
1227
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1222
1228
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1223
1229
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -1229,11 +1235,11 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1229
1235
|
- Shuffle.FILES: Shuffle files only.
|
|
1230
1236
|
|
|
1231
1237
|
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1232
|
-
When this argument is specified,
|
|
1238
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1233
1239
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1234
1240
|
argument can only be specified when `num_shards` is also specified.
|
|
1235
1241
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1236
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1242
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1237
1243
|
Default: None, which means no cache is used.
|
|
1238
1244
|
|
|
1239
1245
|
Raises:
|
|
@@ -1300,7 +1306,7 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1300
1306
|
|
|
1301
1307
|
class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
1302
1308
|
r"""
|
|
1303
|
-
|
|
1309
|
+
Sogou News dataset.
|
|
1304
1310
|
|
|
1305
1311
|
The generated dataset has three columns: :py:obj:`[index, title, content]` ,
|
|
1306
1312
|
and the data type of three columns is string.
|
|
@@ -1324,10 +1330,11 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
1324
1330
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1325
1331
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1326
1332
|
argument can only be specified when `num_shards` is also specified.
|
|
1327
|
-
num_parallel_workers (int, optional): Number of
|
|
1328
|
-
Default: None,
|
|
1333
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1334
|
+
Default: None, will use global default workers(8), it can be set
|
|
1335
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1329
1336
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1330
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1337
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1331
1338
|
Default: None, which means no cache is used.
|
|
1332
1339
|
|
|
1333
1340
|
Raises:
|
|
@@ -1386,31 +1393,30 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
1386
1393
|
|
|
1387
1394
|
class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
1388
1395
|
"""
|
|
1389
|
-
|
|
1396
|
+
SQuAD 1.1 and SQuAD 2.0 datasets.
|
|
1390
1397
|
|
|
1391
1398
|
The generated dataset with different versions and usages has the same output columns:
|
|
1392
|
-
|
|
1399
|
+
:py:obj:`[context, question, text, answer_start]` .
|
|
1393
1400
|
The tensor of column :py:obj:`context` is of the string type.
|
|
1394
1401
|
The tensor of column :py:obj:`question` is of the string type.
|
|
1395
1402
|
The tensor of column :py:obj:`text` is the answer in the context of the string type.
|
|
1396
1403
|
The tensor of column :py:obj:`answer_start` is the start index of answer in context,
|
|
1397
|
-
|
|
1404
|
+
which is of the uint32 type.
|
|
1398
1405
|
|
|
1399
1406
|
Args:
|
|
1400
1407
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1401
1408
|
usage (str, optional): Specify the 'train', 'dev' or 'all' part of dataset. Default: None, all samples.
|
|
1402
1409
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1403
1410
|
Default: None, will include all samples.
|
|
1404
|
-
num_parallel_workers (int, optional): Number of
|
|
1405
|
-
Default: None,
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
If
|
|
1409
|
-
If
|
|
1410
|
-
|
|
1411
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1412
|
+
Default: None, will use global default workers(8), it can be set
|
|
1413
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1414
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: Shuffle.GLOBAL.
|
|
1415
|
+
If False is provided, no shuffling will be performed.
|
|
1416
|
+
If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
|
|
1417
|
+
If Shuffle is provided, the effect is as follows:
|
|
1411
1418
|
|
|
1412
1419
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1413
|
-
|
|
1414
1420
|
- Shuffle.FILES: Shuffle files only.
|
|
1415
1421
|
|
|
1416
1422
|
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
@@ -1418,7 +1424,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1418
1424
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1419
1425
|
argument can only be specified when `num_shards` is also specified.
|
|
1420
1426
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1421
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1427
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1422
1428
|
Default: None, which means no cache is used.
|
|
1423
1429
|
|
|
1424
1430
|
Raises:
|
|
@@ -1426,6 +1432,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1426
1432
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1427
1433
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1428
1434
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1435
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1429
1436
|
|
|
1430
1437
|
Examples:
|
|
1431
1438
|
>>> squad_dataset_dir = "/path/to/squad_dataset_file"
|
|
@@ -1433,7 +1440,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1433
1440
|
|
|
1434
1441
|
About SQuAD dataset:
|
|
1435
1442
|
|
|
1436
|
-
Stanford Question Answering Dataset
|
|
1443
|
+
SQuAD (Stanford Question Answering Dataset) is a reading comprehension dataset, consisting of questions posed by
|
|
1437
1444
|
crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
|
|
1438
1445
|
from the corresponding reading passage, or the question might be unanswerable.
|
|
1439
1446
|
|
|
@@ -1447,6 +1454,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1447
1454
|
For SQuAD 1.1:
|
|
1448
1455
|
|
|
1449
1456
|
.. code-block::
|
|
1457
|
+
|
|
1450
1458
|
.
|
|
1451
1459
|
└── SQuAD1
|
|
1452
1460
|
├── train-v1.1.json
|
|
@@ -1455,6 +1463,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1455
1463
|
For SQuAD 2.0:
|
|
1456
1464
|
|
|
1457
1465
|
.. code-block::
|
|
1466
|
+
|
|
1458
1467
|
.
|
|
1459
1468
|
└── SQuAD2
|
|
1460
1469
|
├── train-v2.0.json
|
|
@@ -1496,6 +1505,106 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1496
1505
|
self.num_shards, self.shard_id)
|
|
1497
1506
|
|
|
1498
1507
|
|
|
1508
|
+
class SST2Dataset(SourceDataset, TextBaseDataset):
|
|
1509
|
+
"""
|
|
1510
|
+
SST2(Stanford Sentiment Treebank v2) dataset.
|
|
1511
|
+
|
|
1512
|
+
The generated dataset's train.tsv and dev.tsv have two columns :py:obj:`[sentence, label]` .
|
|
1513
|
+
The generated dataset's test.tsv has one column :py:obj:`[sentence]` .
|
|
1514
|
+
The tensor of column :py:obj:`sentence` and :py:obj:`label` are of the string type.
|
|
1515
|
+
|
|
1516
|
+
Args:
|
|
1517
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1518
|
+
usage (str, optional): Usage of this dataset, can be `train`, `test` or `dev`. `train` will read
|
|
1519
|
+
from 67,349 train samples, `test` will read from 1,821 test samples, `dev` will read from
|
|
1520
|
+
all 872 samples. Default: None, will read train samples.
|
|
1521
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1522
|
+
Default: None, will include all text.
|
|
1523
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1524
|
+
Default: None, will use global default workers(8), it can be set
|
|
1525
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1526
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1527
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1528
|
+
If shuffle is False, no shuffling will be performed;
|
|
1529
|
+
If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
|
|
1530
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1531
|
+
|
|
1532
|
+
- Shuffle.GLOBAL: Shuffle the samples.
|
|
1533
|
+
|
|
1534
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1535
|
+
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1536
|
+
shard_id (int, optional): The shard ID within num_shards. This argument can only be specified when
|
|
1537
|
+
num_shards is also specified. Default: None.
|
|
1538
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1539
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1540
|
+
Default: None, which means no cache is used.
|
|
1541
|
+
|
|
1542
|
+
Raises:
|
|
1543
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1544
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1545
|
+
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
1546
|
+
RuntimeError: If `shard_id` is specified but num_shards is None.
|
|
1547
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1548
|
+
|
|
1549
|
+
Examples:
|
|
1550
|
+
>>> sst2_dataset_dir = "/path/to/sst2_dataset_directory"
|
|
1551
|
+
>>>
|
|
1552
|
+
>>> # 1) Read 3 samples from SST2 dataset
|
|
1553
|
+
>>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, num_samples=3)
|
|
1554
|
+
>>>
|
|
1555
|
+
>>> # 2) Read train samples from SST2 dataset
|
|
1556
|
+
>>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, usage="train")
|
|
1557
|
+
|
|
1558
|
+
About SST2 dataset:
|
|
1559
|
+
The Stanford Sentiment Treebank is a corpus with fully labeled parse trees that allows for a complete
|
|
1560
|
+
analysis of the compositional effects of sentiment in language. The corpus is based on the dataset introduced
|
|
1561
|
+
by Pang and Lee (2005) and consists of 11,855 single sentences extracted from movie reviews. It was parsed
|
|
1562
|
+
with the Stanford parser and includes a total of 215,154 unique phrases from those parse trees, each
|
|
1563
|
+
annotated by 3 human judges.
|
|
1564
|
+
|
|
1565
|
+
Here is the original SST2 dataset structure.
|
|
1566
|
+
You can unzip the dataset files into this directory structure and read by Mindspore's API.
|
|
1567
|
+
|
|
1568
|
+
.. code-block::
|
|
1569
|
+
|
|
1570
|
+
.
|
|
1571
|
+
└── sst2_dataset_dir
|
|
1572
|
+
├── train.tsv
|
|
1573
|
+
├── test.tsv
|
|
1574
|
+
├── dev.tsv
|
|
1575
|
+
└── original
|
|
1576
|
+
|
|
1577
|
+
Citation:
|
|
1578
|
+
|
|
1579
|
+
.. code-block::
|
|
1580
|
+
|
|
1581
|
+
@inproceedings{socher-etal-2013-recursive,
|
|
1582
|
+
title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
|
|
1583
|
+
author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
|
|
1584
|
+
Christopher D. and Ng, Andrew and Potts, Christopher},
|
|
1585
|
+
booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
|
|
1586
|
+
month = oct,
|
|
1587
|
+
year = {2013},
|
|
1588
|
+
address = {Seattle, Washington, USA},
|
|
1589
|
+
publisher = {Association for Computational Linguistics},
|
|
1590
|
+
url = {https://www.aclweb.org/anthology/D13-1170},
|
|
1591
|
+
pages = {1631--1642},
|
|
1592
|
+
}
|
|
1593
|
+
"""
|
|
1594
|
+
|
|
1595
|
+
@check_sst2_dataset
|
|
1596
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
|
|
1597
|
+
num_shards=None, shard_id=None, cache=None):
|
|
1598
|
+
super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
|
|
1599
|
+
num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
1600
|
+
self.dataset_dir = dataset_dir
|
|
1601
|
+
self.usage = replace_none(usage, "train")
|
|
1602
|
+
|
|
1603
|
+
def parse(self, children=None):
|
|
1604
|
+
return cde.SST2Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
|
|
1605
|
+
self.num_shards, self.shard_id)
|
|
1606
|
+
|
|
1607
|
+
|
|
1499
1608
|
class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
1500
1609
|
"""
|
|
1501
1610
|
A source dataset that reads and parses datasets stored on disk in text format.
|
|
@@ -1506,8 +1615,9 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1506
1615
|
pattern of files. The list will be sorted in a lexicographical order.
|
|
1507
1616
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1508
1617
|
Default: None, will include all images.
|
|
1509
|
-
num_parallel_workers (int, optional): Number of
|
|
1510
|
-
Default: None,
|
|
1618
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1619
|
+
Default: None, will use global default workers(8), it can be set
|
|
1620
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1511
1621
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1512
1622
|
Default: `Shuffle.GLOBAL` . Bool type and Shuffle enum are both supported to pass in.
|
|
1513
1623
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -1523,7 +1633,7 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1523
1633
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1524
1634
|
argument can only be specified when `num_shards` is also specified.
|
|
1525
1635
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1526
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1636
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1527
1637
|
Default: None, which means no cache is used.
|
|
1528
1638
|
|
|
1529
1639
|
Raises:
|
|
@@ -1531,7 +1641,7 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1531
1641
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1532
1642
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1533
1643
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1534
|
-
ValueError: If `shard_id` is
|
|
1644
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1535
1645
|
|
|
1536
1646
|
Examples:
|
|
1537
1647
|
>>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
|
|
@@ -1553,7 +1663,7 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1553
1663
|
|
|
1554
1664
|
class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
1555
1665
|
"""
|
|
1556
|
-
|
|
1666
|
+
UDPOS(Universal Dependencies dataset for Part of Speech) dataset.
|
|
1557
1667
|
|
|
1558
1668
|
The generated dataset has three columns: :py:obj:`[word, universal, stanford]` ,
|
|
1559
1669
|
and the data type of three columns is string.
|
|
@@ -1578,10 +1688,11 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
|
1578
1688
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1579
1689
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1580
1690
|
argument can only be specified when `num_shards` is also specified.
|
|
1581
|
-
num_parallel_workers (int, optional): Number of
|
|
1582
|
-
Default: None,
|
|
1691
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1692
|
+
Default: None, will use global default workers(8), it can be set
|
|
1693
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1583
1694
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1584
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1695
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1585
1696
|
Default: None, which means no cache is used.
|
|
1586
1697
|
|
|
1587
1698
|
Raises:
|
|
@@ -1629,7 +1740,7 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
|
1629
1740
|
|
|
1630
1741
|
class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
1631
1742
|
"""
|
|
1632
|
-
|
|
1743
|
+
WikiText2 and WikiText103 datasets.
|
|
1633
1744
|
|
|
1634
1745
|
The generated dataset has one column :py:obj:`[text]` , and
|
|
1635
1746
|
the tensor of column `text` is of the string type.
|
|
@@ -1638,8 +1749,9 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1638
1749
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1639
1750
|
usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all'. Default: None, all samples.
|
|
1640
1751
|
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1641
|
-
num_parallel_workers (int, optional): Number of
|
|
1642
|
-
Default: None,
|
|
1752
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1753
|
+
Default: None, will use global default workers(8), it can be set
|
|
1754
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1643
1755
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1644
1756
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1645
1757
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -1651,18 +1763,18 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1651
1763
|
- Shuffle.FILES: Shuffle files only.
|
|
1652
1764
|
|
|
1653
1765
|
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1654
|
-
When this argument is specified,
|
|
1766
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1655
1767
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1656
1768
|
argument can only be specified when `num_shards` is also specified.
|
|
1657
1769
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1658
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1770
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1659
1771
|
Default: None, which means no cache is used.
|
|
1660
1772
|
|
|
1661
1773
|
Raises:
|
|
1662
1774
|
RuntimeError: If `dataset_dir` does not contain data files or invalid.
|
|
1663
1775
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1664
1776
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1665
|
-
ValueError: If `shard_id` is
|
|
1777
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1666
1778
|
ValueError: If `num_samples` is invalid (< 0).
|
|
1667
1779
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1668
1780
|
|
|
@@ -1716,7 +1828,7 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1716
1828
|
|
|
1717
1829
|
class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
1718
1830
|
"""
|
|
1719
|
-
|
|
1831
|
+
YahooAnswers dataset.
|
|
1720
1832
|
|
|
1721
1833
|
The generated dataset has four columns :py:obj:`[class, title, content, answer]` , whose data type is string.
|
|
1722
1834
|
|
|
@@ -1727,8 +1839,9 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1727
1839
|
all 1,460,000 samples. Default: None, all samples.
|
|
1728
1840
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1729
1841
|
Default: None, will include all text.
|
|
1730
|
-
num_parallel_workers (int, optional): Number of
|
|
1731
|
-
Default: None,
|
|
1842
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1843
|
+
Default: None, will use global default workers(8), it can be set
|
|
1844
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1732
1845
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1733
1846
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1734
1847
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -1744,14 +1857,14 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1744
1857
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1745
1858
|
argument can only be specified when `num_shards` is also specified.
|
|
1746
1859
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1747
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1860
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1748
1861
|
Default: None, which means no cache is used.
|
|
1749
1862
|
|
|
1750
1863
|
Raises:
|
|
1751
1864
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1752
1865
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1753
1866
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1754
|
-
ValueError: If `shard_id` is
|
|
1867
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1755
1868
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1756
1869
|
|
|
1757
1870
|
Examples:
|
|
@@ -1810,7 +1923,7 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1810
1923
|
|
|
1811
1924
|
class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
1812
1925
|
"""
|
|
1813
|
-
|
|
1926
|
+
Yelp Review Polarity and Yelp Review Full datasets.
|
|
1814
1927
|
|
|
1815
1928
|
The generated dataset has two columns: :py:obj:`[label, text]` , and the data type of two columns is string.
|
|
1816
1929
|
|
|
@@ -1835,10 +1948,11 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
1835
1948
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1836
1949
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1837
1950
|
argument can only be specified when `num_shards` is also specified.
|
|
1838
|
-
num_parallel_workers (int, optional): Number of
|
|
1839
|
-
Default: None,
|
|
1951
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1952
|
+
Default: None, will use global default workers(8), it can be set
|
|
1953
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1840
1954
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1841
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1955
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1842
1956
|
Default: None, which means no cache is used.
|
|
1843
1957
|
|
|
1844
1958
|
Raises:
|