mindspore 1.10.0__cp39-cp39-win_amd64.whl → 2.0.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/ConcurrencyCheck.dll +0 -0
- mindspore/CppBuildInsights.dll +0 -0
- mindspore/CppCoreCheck.dll +0 -0
- mindspore/EnumIndex.dll +0 -0
- mindspore/EspXEngine.dll +0 -0
- mindspore/HResultCheck.dll +0 -0
- mindspore/KernelTraceControl.dll +0 -0
- mindspore/LocalESPC.dll +0 -0
- mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
- mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
- mindspore/VariantClear.dll +0 -0
- mindspore/__init__.py +9 -4
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/amp.py +53 -58
- mindspore/atlprov.dll +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/cfgpersist.dll +0 -0
- mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
- mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +291 -56
- mindspore/d3dcompiler_47.dll +0 -0
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/dpcmi.dll +0 -0
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/perf_msvcbuildinsights.dll +0 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/libatomic-1.dll +0 -0
- mindspore/libgcc_s_seh-1.dll +0 -0
- mindspore/libgfortran-4.dll +0 -0
- mindspore/libgomp-1.dll +0 -0
- mindspore/libjpeg-62.dll +0 -0
- mindspore/libmindspore.dll +0 -0
- mindspore/libmindspore_common.dll +0 -0
- mindspore/libmindspore_core.dll +0 -0
- mindspore/libmindspore_glog.dll +0 -0
- mindspore/libnnacl.dll +0 -0
- mindspore/libopencv_core452.dll +0 -0
- mindspore/libopencv_imgcodecs452.dll +0 -0
- mindspore/libopencv_imgproc452.dll +0 -0
- mindspore/libquadmath-0.dll +0 -0
- mindspore/libsqlite3.dll +0 -0
- mindspore/libssp-0.dll +0 -0
- mindspore/libstdc++-6.dll +0 -0
- mindspore/libtinyxml2.dll +0 -0
- mindspore/libturbojpeg.dll +0 -0
- mindspore/libwinpthread-1.dll +0 -0
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -30,43 +30,49 @@ from .validators import check_imdb_dataset, check_iwslt2016_dataset, check_iwslt
|
|
|
30
30
|
check_penn_treebank_dataset, check_ag_news_dataset, check_amazon_review_dataset, check_udpos_dataset, \
|
|
31
31
|
check_wiki_text_dataset, check_conll2000_dataset, check_cluedataset, \
|
|
32
32
|
check_sogou_news_dataset, check_textfiledataset, check_dbpedia_dataset, check_yelp_review_dataset, \
|
|
33
|
-
check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset
|
|
33
|
+
check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset, \
|
|
34
|
+
check_sst2_dataset
|
|
34
35
|
|
|
35
36
|
from ..core.validator_helpers import replace_none
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
39
40
|
"""
|
|
40
|
-
|
|
41
|
+
AG News dataset.
|
|
41
42
|
|
|
42
|
-
The generated dataset has three columns: :py:obj:`[index, title, description]
|
|
43
|
-
|
|
44
|
-
The tensor of column :py:obj:`title` is of the string type.
|
|
45
|
-
The tensor of column :py:obj:`description` is of the string type.
|
|
43
|
+
The generated dataset has three columns: :py:obj:`[index, title, description]` ,
|
|
44
|
+
and the data type of three columns is string type.
|
|
46
45
|
|
|
47
46
|
Args:
|
|
48
47
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
49
|
-
usage (str, optional): Acceptable usages include 'train', 'test' and 'all'
|
|
50
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
51
|
-
num_parallel_workers (int, optional): Number of
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
If shuffle is
|
|
57
|
-
|
|
48
|
+
usage (str, optional): Acceptable usages include 'train', 'test' and 'all'. Default: None, all samples.
|
|
49
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
50
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
51
|
+
Default: None, will use global default workers(8), it can be set
|
|
52
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
53
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
54
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
55
|
+
If `shuffle` is False, no shuffling will be performed.
|
|
56
|
+
If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
57
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
58
58
|
|
|
59
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
59
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
60
60
|
|
|
61
61
|
- Shuffle.FILES: Shuffle files only.
|
|
62
62
|
|
|
63
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
64
|
-
When this argument is specified,
|
|
65
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
66
|
-
argument can only be specified when `num_shards` is also specified.
|
|
63
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
64
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
65
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
66
|
+
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
67
67
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
68
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
69
|
-
|
|
68
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
69
|
+
Default: None, which means no cache is used.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
73
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
74
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
75
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
70
76
|
|
|
71
77
|
Examples:
|
|
72
78
|
>>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
|
|
@@ -125,49 +131,48 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
125
131
|
|
|
126
132
|
class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
127
133
|
"""
|
|
128
|
-
|
|
134
|
+
Amazon Review Polarity and Amazon Review Full datasets.
|
|
129
135
|
|
|
130
|
-
The generated dataset has three columns: :py:obj:`[label, title, content]
|
|
131
|
-
|
|
132
|
-
The tensor of column :py:obj:`title` is of the string type.
|
|
133
|
-
The tensor of column :py:obj:`content` is of the string type.
|
|
136
|
+
The generated dataset has three columns: :py:obj:`[label, title, content]` ,
|
|
137
|
+
and the data type of three columns is string.
|
|
134
138
|
|
|
135
139
|
Args:
|
|
136
140
|
dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
|
|
137
141
|
or the Amazon Review Full dataset.
|
|
138
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'
|
|
142
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
139
143
|
For Polarity dataset, 'train' will read from 3,600,000 train samples,
|
|
140
144
|
'test' will read from 400,000 test samples,
|
|
141
145
|
'all' will read from all 4,000,000 samples.
|
|
142
146
|
For Full dataset, 'train' will read from 3,000,000 train samples,
|
|
143
147
|
'test' will read from 650,000 test samples,
|
|
144
|
-
'all' will read from all 3,650,000 samples
|
|
145
|
-
num_samples (int, optional): Number of samples (rows) to be read
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
148
|
+
'all' will read from all 3,650,000 samples. Default: None, all samples.
|
|
149
|
+
num_samples (int, optional): Number of samples (rows) to be read. Default: None, reads the full dataset.
|
|
150
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
151
|
+
Default: None, will use global default workers(8), it can be set
|
|
152
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
153
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
154
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
155
|
+
If `shuffle` is False, no shuffling will be performed.
|
|
156
|
+
If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
157
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
151
158
|
|
|
152
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
159
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
153
160
|
|
|
154
161
|
- Shuffle.FILES: Shuffle files only.
|
|
155
162
|
|
|
156
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
163
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
157
164
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
158
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
165
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
159
166
|
argument can only be specified when `num_shards` is also specified.
|
|
160
|
-
num_parallel_workers (int, optional): Number of workers to read the data
|
|
161
|
-
(default=None, number set in the mindspore.dataset.config).
|
|
162
167
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
163
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
164
|
-
|
|
168
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
169
|
+
Default: None, which means no cache is used.
|
|
165
170
|
|
|
166
171
|
Raises:
|
|
167
172
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
168
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
169
173
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
170
174
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
175
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
171
176
|
|
|
172
177
|
Examples:
|
|
173
178
|
>>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
|
|
@@ -180,7 +185,7 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
180
185
|
The dataset is mainly used for text classification, given the content and title, predict the correct star rating.
|
|
181
186
|
|
|
182
187
|
The Amazon reviews polarity dataset is constructed by taking review score 1 and 2 as negative, 4 and 5 as positive.
|
|
183
|
-
Samples of score 3 is ignored.
|
|
188
|
+
Samples of score 3 is ignored.
|
|
184
189
|
|
|
185
190
|
The Amazon Reviews Polarity and Amazon Reviews Full datasets have the same directory structures.
|
|
186
191
|
You can unzip the dataset files into the following structure and read by MindSpore's API:
|
|
@@ -222,21 +227,22 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
222
227
|
|
|
223
228
|
class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
224
229
|
"""
|
|
225
|
-
|
|
230
|
+
CLUE(Chinese Language Understanding Evaluation) dataset.
|
|
226
231
|
Supported CLUE classification tasks: 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
|
|
227
232
|
|
|
228
233
|
Args:
|
|
229
234
|
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
|
|
230
235
|
a pattern of files. The list will be sorted in a lexicographical order.
|
|
231
236
|
task (str, optional): The kind of task, one of 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
|
|
232
|
-
|
|
233
|
-
usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset
|
|
234
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
235
|
-
|
|
236
|
-
num_parallel_workers (int, optional): Number of
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
237
|
+
Default: 'AFQMC'.
|
|
238
|
+
usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset. Default: 'train'.
|
|
239
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
240
|
+
Default: None, will include all images.
|
|
241
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
242
|
+
Default: None, will use global default workers(8), it can be set
|
|
243
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
244
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
245
|
+
Default: Shuffle.GLOBAL. Bool type and Shuffle enum are both supported to pass in.
|
|
240
246
|
If shuffle is False, no shuffling will be performed.
|
|
241
247
|
If shuffle is True, performs global shuffle.
|
|
242
248
|
There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
|
|
@@ -245,13 +251,13 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
245
251
|
|
|
246
252
|
- Shuffle.FILES: Shuffle files only.
|
|
247
253
|
|
|
248
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
254
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
249
255
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
250
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
256
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
251
257
|
argument can only be specified when `num_shards` is also specified.
|
|
252
258
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
253
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
254
|
-
|
|
259
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
260
|
+
Default: None, which means no cache is used.
|
|
255
261
|
|
|
256
262
|
The generated dataset with different task setting has different output columns:
|
|
257
263
|
|
|
@@ -400,9 +406,9 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
400
406
|
ValueError: task is not in 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' or 'CSL'.
|
|
401
407
|
ValueError: usage is not in 'train', 'test' or 'eval'.
|
|
402
408
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
409
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
403
410
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
404
411
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
405
|
-
ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
|
|
406
412
|
|
|
407
413
|
Examples:
|
|
408
414
|
>>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
|
|
@@ -457,10 +463,10 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
|
|
|
457
463
|
|
|
458
464
|
class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
459
465
|
"""
|
|
460
|
-
|
|
466
|
+
CoNLL-2000(Conference on Computational Natural Language Learning) chunking dataset.
|
|
461
467
|
|
|
462
|
-
The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]
|
|
463
|
-
The tensors of column :py:obj:`word
|
|
468
|
+
The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]` .
|
|
469
|
+
The tensors of column :py:obj:`word` , column :py:obj:`pos_tag` ,
|
|
464
470
|
and column :py:obj:`chunk_tag` are of the string type.
|
|
465
471
|
|
|
466
472
|
Args:
|
|
@@ -471,7 +477,7 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
471
477
|
'all' will read from all 1,0948 samples. Default: None, read all samples.
|
|
472
478
|
num_samples (int, optional): Number of samples (rows) to be read. Default: None, read the full dataset.
|
|
473
479
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
474
|
-
Default: mindspore.dataset.Shuffle.GLOBAL.
|
|
480
|
+
Default: `mindspore.dataset.Shuffle.GLOBAL` .
|
|
475
481
|
If shuffle is False, no shuffling will be performed.
|
|
476
482
|
If shuffle is True, performs global shuffle.
|
|
477
483
|
There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
|
|
@@ -481,12 +487,13 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
481
487
|
|
|
482
488
|
num_shards (int, optional): Number of shards that the dataset will be divided into.
|
|
483
489
|
When this argument is specified, `num_samples` reflects the max sample number of per shard. Default: None.
|
|
484
|
-
shard_id (int, optional): The shard ID within `num_shards
|
|
490
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
485
491
|
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
486
|
-
num_parallel_workers (int, optional): Number of
|
|
487
|
-
Default: None,
|
|
492
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
493
|
+
Default: None, will use global default workers(8), it can be set
|
|
494
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
488
495
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
489
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
496
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
490
497
|
Default: None, which means no cache is used.
|
|
491
498
|
|
|
492
499
|
Raises:
|
|
@@ -547,47 +554,46 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
|
|
|
547
554
|
|
|
548
555
|
class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
549
556
|
"""
|
|
550
|
-
|
|
557
|
+
DBpedia dataset.
|
|
551
558
|
|
|
552
|
-
The generated dataset has three columns :py:obj:`[class, title, content]
|
|
553
|
-
|
|
554
|
-
The tensor of column :py:obj:`title` is of the string type.
|
|
555
|
-
The tensor of column :py:obj:`content` is of the string type.
|
|
559
|
+
The generated dataset has three columns :py:obj:`[class, title, content]` ,
|
|
560
|
+
and the data type of three columns is string.
|
|
556
561
|
|
|
557
562
|
Args:
|
|
558
563
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
559
564
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
560
565
|
'train' will read from 560,000 train samples,
|
|
561
566
|
'test' will read from 70,000 test samples,
|
|
562
|
-
'all' will read from all 630,000 samples
|
|
563
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
564
|
-
|
|
565
|
-
num_parallel_workers (int, optional): Number of
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
567
|
+
'all' will read from all 630,000 samples. Default: None, all samples.
|
|
568
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
569
|
+
Default: None, will include all text.
|
|
570
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
571
|
+
Default: None, will use global default workers(8), it can be set
|
|
572
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
573
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
574
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
569
575
|
If shuffle is False, no shuffling will be performed.
|
|
570
|
-
If shuffle is True,
|
|
571
|
-
|
|
576
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
577
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
572
578
|
|
|
573
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
579
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
574
580
|
|
|
575
581
|
- Shuffle.FILES: Shuffle files only.
|
|
576
582
|
|
|
577
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
583
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
578
584
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
579
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
585
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
580
586
|
argument can only be specified when `num_shards` is also specified.
|
|
581
587
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
582
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
583
|
-
|
|
588
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
589
|
+
Default: None, which means no cache is used.
|
|
584
590
|
|
|
585
591
|
Raises:
|
|
586
592
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
587
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
588
593
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
589
594
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
590
|
-
ValueError: If `
|
|
595
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
596
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
591
597
|
|
|
592
598
|
Examples:
|
|
593
599
|
>>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
|
|
@@ -646,33 +652,40 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
|
|
|
646
652
|
|
|
647
653
|
class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
648
654
|
"""
|
|
649
|
-
|
|
655
|
+
EnWik9 dataset.
|
|
650
656
|
|
|
651
657
|
The generated dataset has one column :py:obj:`[text]` with type string.
|
|
652
658
|
|
|
653
659
|
Args:
|
|
654
660
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
655
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
656
|
-
|
|
657
|
-
num_parallel_workers (int, optional): Number of
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
662
|
+
Default: None, will include all samples.
|
|
663
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
664
|
+
Default: None, will use global default workers(8), it can be set
|
|
665
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
666
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
667
|
+
Bool type and Shuffle enum are both supported to pass in. Default: True.
|
|
661
668
|
If shuffle is False, no shuffling will be performed.
|
|
662
|
-
If shuffle is True,
|
|
663
|
-
|
|
669
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
670
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
664
671
|
|
|
665
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
672
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
666
673
|
|
|
667
674
|
- Shuffle.FILES: Shuffle files only.
|
|
668
675
|
|
|
669
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
676
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
670
677
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
671
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
678
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
672
679
|
argument can only be specified when `num_shards` is also specified.
|
|
673
680
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
674
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
675
|
-
|
|
681
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
682
|
+
Default: None, which means no cache is used.
|
|
683
|
+
|
|
684
|
+
Raises:
|
|
685
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
686
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
687
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
688
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
676
689
|
|
|
677
690
|
Examples:
|
|
678
691
|
>>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
|
|
@@ -723,45 +736,46 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
|
|
|
723
736
|
|
|
724
737
|
class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
725
738
|
"""
|
|
726
|
-
|
|
739
|
+
IMDb(Internet Movie Database) dataset.
|
|
727
740
|
|
|
728
|
-
The generated dataset has two columns: :py:obj:`[text, label]
|
|
741
|
+
The generated dataset has two columns: :py:obj:`[text, label]` .
|
|
729
742
|
The tensor of column :py:obj:`text` is of the string type.
|
|
730
|
-
The
|
|
743
|
+
The column :py:obj:`label` is of a scalar of uint32 type.
|
|
731
744
|
|
|
732
745
|
Args:
|
|
733
746
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
734
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'
|
|
735
|
-
|
|
736
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
737
|
-
|
|
738
|
-
num_parallel_workers (int, optional): Number of
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
747
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
748
|
+
Default: None, will read all samples.
|
|
749
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
750
|
+
Default: None, will include all samples.
|
|
751
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
752
|
+
Default: None, will use global default workers(8), it can be set
|
|
753
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
754
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
755
|
+
Default: None, expected order behavior shown in the table below.
|
|
756
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
757
|
+
Default: None, expected order behavior shown in the table below.
|
|
744
758
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
745
|
-
into
|
|
759
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
746
760
|
the maximum sample number of per shard.
|
|
747
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
761
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
748
762
|
argument can only be specified when `num_shards` is also specified.
|
|
749
763
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
750
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
751
|
-
|
|
764
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
765
|
+
Default: None, which means no cache is used.
|
|
752
766
|
|
|
753
767
|
Raises:
|
|
754
768
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
755
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
756
769
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
757
770
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
758
771
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
759
772
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
760
|
-
ValueError: If `
|
|
773
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
774
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
761
775
|
|
|
762
776
|
Note:
|
|
763
777
|
- The shape of the test column.
|
|
764
|
-
- This dataset can take in a `sampler
|
|
778
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
765
779
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
766
780
|
|
|
767
781
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -865,49 +879,51 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
|
|
|
865
879
|
|
|
866
880
|
class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
867
881
|
"""
|
|
868
|
-
|
|
882
|
+
IWSLT2016(International Workshop on Spoken Language Translation) dataset.
|
|
869
883
|
|
|
870
|
-
The generated dataset has two columns: :py:obj:`[text, translation]
|
|
884
|
+
The generated dataset has two columns: :py:obj:`[text, translation]` .
|
|
871
885
|
The tensor of column :py:obj: `text` is of the string type.
|
|
872
|
-
The
|
|
886
|
+
The column :py:obj: `translation` is of the string type.
|
|
873
887
|
|
|
874
888
|
Args:
|
|
875
889
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
876
|
-
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'
|
|
890
|
+
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None, all samples.
|
|
877
891
|
language_pair (sequence, optional): Sequence containing source and target language, supported values are
|
|
878
892
|
('en', 'fr'), ('en', 'de'), ('en', 'cs'), ('en', 'ar'), ('fr', 'en'), ('de', 'en'), ('cs', 'en'),
|
|
879
|
-
('ar', 'en') (
|
|
893
|
+
('ar', 'en'). Default: ('de', 'en').
|
|
880
894
|
valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
|
|
881
|
-
of valid_set type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013'
|
|
882
|
-
and 'tst2014'
|
|
883
|
-
test_set (str, optional): A string to identify test set, when usage is test or all, the test set of test_set
|
|
884
|
-
type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013' and 'tst2014'
|
|
885
|
-
|
|
886
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
887
|
-
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
|
|
888
|
-
|
|
889
|
-
If shuffle is False, no shuffling will be performed.
|
|
890
|
-
If shuffle is True,
|
|
891
|
-
|
|
895
|
+
of `valid_set` type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013'
|
|
896
|
+
and 'tst2014'. Default: 'tst2013'.
|
|
897
|
+
test_set (str, optional): A string to identify test set, when usage is test or all, the test set of `test_set`
|
|
898
|
+
type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013' and 'tst2014'.
|
|
899
|
+
Default: 'tst2014'.
|
|
900
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
901
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
902
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
903
|
+
If `shuffle` is False, no shuffling will be performed.
|
|
904
|
+
If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
905
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
892
906
|
|
|
893
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
907
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
894
908
|
|
|
895
909
|
- Shuffle.FILES: Shuffle files only.
|
|
896
|
-
|
|
910
|
+
|
|
911
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
897
912
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
898
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
913
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
899
914
|
argument can only be specified when `num_shards` is also specified.
|
|
900
|
-
num_parallel_workers (int, optional): Number of
|
|
901
|
-
|
|
915
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
916
|
+
Default: None, will use global default workers(8), it can be set
|
|
917
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
902
918
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
903
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
904
|
-
|
|
919
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
920
|
+
Default: None, which means no cache is used.
|
|
905
921
|
|
|
906
922
|
Raises:
|
|
907
923
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
908
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
909
924
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
910
925
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
926
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
911
927
|
|
|
912
928
|
Examples:
|
|
913
929
|
>>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
|
|
@@ -918,8 +934,8 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
918
934
|
|
|
919
935
|
IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
|
|
920
936
|
of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
|
|
921
|
-
obtained through the WIT3 website wit3.fbk.eu. The IWSLT2016 dataset includes translations from
|
|
922
|
-
Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
|
|
937
|
+
obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2016 dataset includes translations from
|
|
938
|
+
English to Arabic, Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
|
|
923
939
|
|
|
924
940
|
You can unzip the original IWSLT2016 dataset files into this directory structure and read by MindSpore's API. After
|
|
925
941
|
decompression, you also need to decompress the dataset to be read in the specified folder. For example, if you want
|
|
@@ -992,44 +1008,45 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
|
|
|
992
1008
|
|
|
993
1009
|
class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
994
1010
|
"""
|
|
995
|
-
|
|
1011
|
+
IWSLT2017(International Workshop on Spoken Language Translation) dataset.
|
|
996
1012
|
|
|
997
|
-
The generated dataset has two columns: :py:obj:`[text, translation]
|
|
998
|
-
The tensor of column :py:obj:`text`
|
|
999
|
-
The tensor of column :py:obj:`translation` is of the string type.
|
|
1013
|
+
The generated dataset has two columns: :py:obj:`[text, translation]` .
|
|
1014
|
+
The tensor of column :py:obj:`text` and :py:obj:`translation` are of the string type.
|
|
1000
1015
|
|
|
1001
1016
|
Args:
|
|
1002
1017
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1003
|
-
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'
|
|
1018
|
+
usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None, all samples.
|
|
1004
1019
|
language_pair (sequence, optional): List containing src and tgt language, supported values are ('en', 'nl'),
|
|
1005
1020
|
('en', 'de'), ('en', 'it'), ('en', 'ro'), ('nl', 'en'), ('nl', 'de'), ('nl', 'it'), ('nl', 'ro'),
|
|
1006
1021
|
('de', 'en'), ('de', 'nl'), ('de', 'it'), ('de', 'ro'), ('it', 'en'), ('it', 'nl'), ('it', 'de'),
|
|
1007
|
-
('it', 'ro'), ('ro', 'en'), ('ro', 'nl'), ('ro', 'de'), ('ro', 'it') (
|
|
1008
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1009
|
-
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
|
|
1010
|
-
|
|
1022
|
+
('it', 'ro'), ('ro', 'en'), ('ro', 'nl'), ('ro', 'de'), ('ro', 'it'). Default: ('de', 'en').
|
|
1023
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1024
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1025
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1011
1026
|
If shuffle is False, no shuffling will be performed.
|
|
1012
|
-
If shuffle is True,
|
|
1013
|
-
|
|
1027
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1028
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1014
1029
|
|
|
1015
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
1030
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1016
1031
|
|
|
1017
1032
|
- Shuffle.FILES: Shuffle files only.
|
|
1018
|
-
|
|
1033
|
+
|
|
1034
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1019
1035
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1020
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1036
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1021
1037
|
argument can only be specified when `num_shards` is also specified.
|
|
1022
|
-
num_parallel_workers (int, optional): Number of
|
|
1023
|
-
|
|
1038
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1039
|
+
Default: None, will use global default workers(8), it can be set
|
|
1040
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1024
1041
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1025
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1026
|
-
|
|
1042
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1043
|
+
Default: None, which means no cache is used.
|
|
1027
1044
|
|
|
1028
1045
|
Raises:
|
|
1029
1046
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1030
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1031
1047
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1032
1048
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1049
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1033
1050
|
|
|
1034
1051
|
Examples:
|
|
1035
1052
|
>>> iwslt2017_dataset_dir = "/path/to/iwslt2017_dataset_dir"
|
|
@@ -1039,8 +1056,8 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1039
1056
|
|
|
1040
1057
|
IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
|
|
1041
1058
|
of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
|
|
1042
|
-
obtained through the WIT3 website wit3.fbk.eu. The IWSLT2017 dataset involves German, English,
|
|
1043
|
-
Romanian. The dataset includes translations in any two different languages.
|
|
1059
|
+
obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2017 dataset involves German, English,
|
|
1060
|
+
Italian, Dutch, and Romanian. The dataset includes translations in any two different languages.
|
|
1044
1061
|
|
|
1045
1062
|
You can unzip the original IWSLT2017 dataset files into this directory structure and read by MindSpore's API. You
|
|
1046
1063
|
need to decompress the dataset package in texts/DeEnItNlRo/DeEnItNlRo directory to get the DeEnItNlRo-DeEnItNlRo
|
|
@@ -1089,48 +1106,49 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
|
|
|
1089
1106
|
|
|
1090
1107
|
class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
1091
1108
|
"""
|
|
1092
|
-
|
|
1109
|
+
Multi30k dataset.
|
|
1093
1110
|
|
|
1094
|
-
The generated dataset has two columns :py:obj:`[text, translation]
|
|
1095
|
-
The tensor of column :py:obj
|
|
1096
|
-
The tensor of column :py:obj
|
|
1111
|
+
The generated dataset has two columns :py:obj:`[text, translation]` .
|
|
1112
|
+
The tensor of column :py:obj:`text` is of the string type.
|
|
1113
|
+
The tensor of column :py:obj:`translation` is of the string type.
|
|
1097
1114
|
|
|
1098
|
-
|
|
1115
|
+
Args:
|
|
1099
1116
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1100
|
-
usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all'
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
If
|
|
1111
|
-
|
|
1117
|
+
usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all'.
|
|
1118
|
+
Default: None, will read all samples.
|
|
1119
|
+
language_pair (Sequence[str, str], optional): Acceptable language_pair include ['en', 'de'], ['de', 'en'].
|
|
1120
|
+
Default: None, means ['en', 'de'].
|
|
1121
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1122
|
+
Default: None, will read all samples.
|
|
1123
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1124
|
+
Default: None, will use global default workers(8), it can be set
|
|
1125
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1126
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: None, means Shuffle.GLOBAL.
|
|
1127
|
+
If False is provided, no shuffling will be performed.
|
|
1128
|
+
If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
|
|
1129
|
+
If Shuffle is provided, the effect is as follows:
|
|
1112
1130
|
|
|
1113
1131
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1114
|
-
|
|
1115
1132
|
- Shuffle.FILES: Shuffle files only.
|
|
1116
1133
|
|
|
1117
1134
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
1118
|
-
into
|
|
1135
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
1119
1136
|
the max sample number of per shard.
|
|
1120
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1137
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1121
1138
|
argument can only be specified when `num_shards` is also specified.
|
|
1122
1139
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1123
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1124
|
-
|
|
1140
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1141
|
+
Default: None, which means no cache is used.
|
|
1125
1142
|
|
|
1126
1143
|
Raises:
|
|
1127
1144
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1145
|
+
ValueError: If `usage` is not 'train', 'test', 'valid' or 'all'.
|
|
1146
|
+
TypeError: If `language_pair` is not of type Sequence[str, str].
|
|
1147
|
+
RuntimeError: If num_samples is less than 0.
|
|
1148
|
+
RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1131
1149
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1132
1150
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1133
|
-
|
|
1151
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1134
1152
|
|
|
1135
1153
|
Examples:
|
|
1136
1154
|
>>> multi30k_dataset_dir = "/path/to/multi30k_dataset_directory"
|
|
@@ -1138,11 +1156,11 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1138
1156
|
|
|
1139
1157
|
About Multi30k dataset:
|
|
1140
1158
|
|
|
1141
|
-
Multi30K is a dataset
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1159
|
+
Multi30K is a multilingual dataset that features approximately 31,000 standardized images
|
|
1160
|
+
described in multiple languages. The images are sourced from Flickr and each image comes
|
|
1161
|
+
with sentence descripitions in both English and German, as well as descriptions in other
|
|
1162
|
+
languages. Multi30k is used primarily for training and testing in tasks such as image
|
|
1163
|
+
captioning, machine translation, and visual question answering.
|
|
1146
1164
|
|
|
1147
1165
|
You can unzip the dataset files into the following directory structure and read by MindSpore's API.
|
|
1148
1166
|
|
|
@@ -1190,9 +1208,9 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
|
|
|
1190
1208
|
|
|
1191
1209
|
class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
1192
1210
|
"""
|
|
1193
|
-
|
|
1211
|
+
PennTreebank dataset.
|
|
1194
1212
|
|
|
1195
|
-
The generated dataset has one column :py:obj:`[text]
|
|
1213
|
+
The generated dataset has one column :py:obj:`[text]` .
|
|
1196
1214
|
The tensor of column :py:obj:`text` is of the string type.
|
|
1197
1215
|
|
|
1198
1216
|
Args:
|
|
@@ -1201,27 +1219,34 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1201
1219
|
'train' will read from 42,068 train samples of string type,
|
|
1202
1220
|
'test' will read from 3,370 test samples of string type,
|
|
1203
1221
|
'valid' will read from 3,761 test samples of string type,
|
|
1204
|
-
'all' will read from all 49,199 samples of string type
|
|
1205
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1206
|
-
num_parallel_workers (int, optional): Number of
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1222
|
+
'all' will read from all 49,199 samples of string type. Default: None, all samples.
|
|
1223
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1224
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1225
|
+
Default: None, will use global default workers(8), it can be set
|
|
1226
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1227
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1228
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1210
1229
|
If shuffle is False, no shuffling will be performed.
|
|
1211
|
-
If shuffle is True,
|
|
1212
|
-
|
|
1230
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1231
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1213
1232
|
|
|
1214
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples
|
|
1233
|
+
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1215
1234
|
|
|
1216
1235
|
- Shuffle.FILES: Shuffle files only.
|
|
1217
1236
|
|
|
1218
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1219
|
-
When this argument is specified,
|
|
1220
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1237
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1238
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1239
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1221
1240
|
argument can only be specified when `num_shards` is also specified.
|
|
1222
1241
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1223
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1224
|
-
|
|
1242
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1243
|
+
Default: None, which means no cache is used.
|
|
1244
|
+
|
|
1245
|
+
Raises:
|
|
1246
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1247
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1248
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1249
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1225
1250
|
|
|
1226
1251
|
Examples:
|
|
1227
1252
|
>>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
|
|
@@ -1281,43 +1306,42 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
|
|
|
1281
1306
|
|
|
1282
1307
|
class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
1283
1308
|
r"""
|
|
1284
|
-
|
|
1309
|
+
Sogou News dataset.
|
|
1285
1310
|
|
|
1286
|
-
The generated dataset has three columns: :py:obj:`[index, title, content]
|
|
1287
|
-
|
|
1288
|
-
The tensor of column :py:obj:`title` is of the string type.
|
|
1289
|
-
The tensor of column :py:obj:`content` is of the string type.
|
|
1311
|
+
The generated dataset has three columns: :py:obj:`[index, title, content]` ,
|
|
1312
|
+
and the data type of three columns is string.
|
|
1290
1313
|
|
|
1291
1314
|
Args:
|
|
1292
1315
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1293
1316
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' .
|
|
1294
1317
|
'train' will read from 450,000 train samples, 'test' will read from 60,000 test samples,
|
|
1295
|
-
'all' will read from all 510,000 samples
|
|
1296
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1297
|
-
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
|
|
1298
|
-
|
|
1318
|
+
'all' will read from all 510,000 samples. Default: None, all samples.
|
|
1319
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, read all samples.
|
|
1320
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1321
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1299
1322
|
If shuffle is False, no shuffling will be performed.
|
|
1300
|
-
If shuffle is True,
|
|
1301
|
-
|
|
1323
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1324
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1302
1325
|
|
|
1303
1326
|
- Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
|
|
1304
1327
|
|
|
1305
1328
|
- Shuffle.FILES: Shuffle files only.
|
|
1306
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1329
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1307
1330
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1308
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1331
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1309
1332
|
argument can only be specified when `num_shards` is also specified.
|
|
1310
|
-
num_parallel_workers (int, optional): Number of
|
|
1311
|
-
|
|
1333
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1334
|
+
Default: None, will use global default workers(8), it can be set
|
|
1335
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1312
1336
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1313
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1314
|
-
|
|
1337
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1338
|
+
Default: None, which means no cache is used.
|
|
1315
1339
|
|
|
1316
1340
|
Raises:
|
|
1317
1341
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1318
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1319
1342
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1320
1343
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1344
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1321
1345
|
|
|
1322
1346
|
Examples:
|
|
1323
1347
|
>>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
|
|
@@ -1369,46 +1393,46 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
|
|
|
1369
1393
|
|
|
1370
1394
|
class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
1371
1395
|
"""
|
|
1372
|
-
|
|
1396
|
+
SQuAD 1.1 and SQuAD 2.0 datasets.
|
|
1373
1397
|
|
|
1374
1398
|
The generated dataset with different versions and usages has the same output columns:
|
|
1375
|
-
|
|
1399
|
+
:py:obj:`[context, question, text, answer_start]` .
|
|
1376
1400
|
The tensor of column :py:obj:`context` is of the string type.
|
|
1377
1401
|
The tensor of column :py:obj:`question` is of the string type.
|
|
1378
1402
|
The tensor of column :py:obj:`text` is the answer in the context of the string type.
|
|
1379
1403
|
The tensor of column :py:obj:`answer_start` is the start index of answer in context,
|
|
1380
|
-
|
|
1404
|
+
which is of the uint32 type.
|
|
1381
1405
|
|
|
1382
1406
|
Args:
|
|
1383
1407
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1384
|
-
usage (str, optional): Specify the
|
|
1385
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
1386
|
-
|
|
1387
|
-
num_parallel_workers (int, optional): Number of
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
If
|
|
1392
|
-
If
|
|
1393
|
-
|
|
1408
|
+
usage (str, optional): Specify the 'train', 'dev' or 'all' part of dataset. Default: None, all samples.
|
|
1409
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1410
|
+
Default: None, will include all samples.
|
|
1411
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1412
|
+
Default: None, will use global default workers(8), it can be set
|
|
1413
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1414
|
+
shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: Shuffle.GLOBAL.
|
|
1415
|
+
If False is provided, no shuffling will be performed.
|
|
1416
|
+
If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
|
|
1417
|
+
If Shuffle is provided, the effect is as follows:
|
|
1394
1418
|
|
|
1395
1419
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1396
|
-
|
|
1397
1420
|
- Shuffle.FILES: Shuffle files only.
|
|
1398
1421
|
|
|
1399
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1422
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1400
1423
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1401
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1424
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1402
1425
|
argument can only be specified when `num_shards` is also specified.
|
|
1403
1426
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1404
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1405
|
-
|
|
1427
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1428
|
+
Default: None, which means no cache is used.
|
|
1406
1429
|
|
|
1407
1430
|
Raises:
|
|
1408
1431
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1409
1432
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1410
1433
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1411
1434
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1435
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1412
1436
|
|
|
1413
1437
|
Examples:
|
|
1414
1438
|
>>> squad_dataset_dir = "/path/to/squad_dataset_file"
|
|
@@ -1416,7 +1440,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1416
1440
|
|
|
1417
1441
|
About SQuAD dataset:
|
|
1418
1442
|
|
|
1419
|
-
Stanford Question Answering Dataset
|
|
1443
|
+
SQuAD (Stanford Question Answering Dataset) is a reading comprehension dataset, consisting of questions posed by
|
|
1420
1444
|
crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
|
|
1421
1445
|
from the corresponding reading passage, or the question might be unanswerable.
|
|
1422
1446
|
|
|
@@ -1430,6 +1454,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1430
1454
|
For SQuAD 1.1:
|
|
1431
1455
|
|
|
1432
1456
|
.. code-block::
|
|
1457
|
+
|
|
1433
1458
|
.
|
|
1434
1459
|
└── SQuAD1
|
|
1435
1460
|
├── train-v1.1.json
|
|
@@ -1438,6 +1463,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1438
1463
|
For SQuAD 2.0:
|
|
1439
1464
|
|
|
1440
1465
|
.. code-block::
|
|
1466
|
+
|
|
1441
1467
|
.
|
|
1442
1468
|
└── SQuAD2
|
|
1443
1469
|
├── train-v2.0.json
|
|
@@ -1479,6 +1505,106 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
|
|
|
1479
1505
|
self.num_shards, self.shard_id)
|
|
1480
1506
|
|
|
1481
1507
|
|
|
1508
|
+
class SST2Dataset(SourceDataset, TextBaseDataset):
|
|
1509
|
+
"""
|
|
1510
|
+
SST2(Stanford Sentiment Treebank v2) dataset.
|
|
1511
|
+
|
|
1512
|
+
The generated dataset's train.tsv and dev.tsv have two columns :py:obj:`[sentence, label]` .
|
|
1513
|
+
The generated dataset's test.tsv has one column :py:obj:`[sentence]` .
|
|
1514
|
+
The tensor of column :py:obj:`sentence` and :py:obj:`label` are of the string type.
|
|
1515
|
+
|
|
1516
|
+
Args:
|
|
1517
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1518
|
+
usage (str, optional): Usage of this dataset, can be `train`, `test` or `dev`. `train` will read
|
|
1519
|
+
from 67,349 train samples, `test` will read from 1,821 test samples, `dev` will read from
|
|
1520
|
+
all 872 samples. Default: None, will read train samples.
|
|
1521
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1522
|
+
Default: None, will include all text.
|
|
1523
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1524
|
+
Default: None, will use global default workers(8), it can be set
|
|
1525
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1526
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1527
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1528
|
+
If shuffle is False, no shuffling will be performed;
|
|
1529
|
+
If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
|
|
1530
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1531
|
+
|
|
1532
|
+
- Shuffle.GLOBAL: Shuffle the samples.
|
|
1533
|
+
|
|
1534
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1535
|
+
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1536
|
+
shard_id (int, optional): The shard ID within num_shards. This argument can only be specified when
|
|
1537
|
+
num_shards is also specified. Default: None.
|
|
1538
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1539
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1540
|
+
Default: None, which means no cache is used.
|
|
1541
|
+
|
|
1542
|
+
Raises:
|
|
1543
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1544
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1545
|
+
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
1546
|
+
RuntimeError: If `shard_id` is specified but num_shards is None.
|
|
1547
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1548
|
+
|
|
1549
|
+
Examples:
|
|
1550
|
+
>>> sst2_dataset_dir = "/path/to/sst2_dataset_directory"
|
|
1551
|
+
>>>
|
|
1552
|
+
>>> # 1) Read 3 samples from SST2 dataset
|
|
1553
|
+
>>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, num_samples=3)
|
|
1554
|
+
>>>
|
|
1555
|
+
>>> # 2) Read train samples from SST2 dataset
|
|
1556
|
+
>>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, usage="train")
|
|
1557
|
+
|
|
1558
|
+
About SST2 dataset:
|
|
1559
|
+
The Stanford Sentiment Treebank is a corpus with fully labeled parse trees that allows for a complete
|
|
1560
|
+
analysis of the compositional effects of sentiment in language. The corpus is based on the dataset introduced
|
|
1561
|
+
by Pang and Lee (2005) and consists of 11,855 single sentences extracted from movie reviews. It was parsed
|
|
1562
|
+
with the Stanford parser and includes a total of 215,154 unique phrases from those parse trees, each
|
|
1563
|
+
annotated by 3 human judges.
|
|
1564
|
+
|
|
1565
|
+
Here is the original SST2 dataset structure.
|
|
1566
|
+
You can unzip the dataset files into this directory structure and read by Mindspore's API.
|
|
1567
|
+
|
|
1568
|
+
.. code-block::
|
|
1569
|
+
|
|
1570
|
+
.
|
|
1571
|
+
└── sst2_dataset_dir
|
|
1572
|
+
├── train.tsv
|
|
1573
|
+
├── test.tsv
|
|
1574
|
+
├── dev.tsv
|
|
1575
|
+
└── original
|
|
1576
|
+
|
|
1577
|
+
Citation:
|
|
1578
|
+
|
|
1579
|
+
.. code-block::
|
|
1580
|
+
|
|
1581
|
+
@inproceedings{socher-etal-2013-recursive,
|
|
1582
|
+
title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
|
|
1583
|
+
author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
|
|
1584
|
+
Christopher D. and Ng, Andrew and Potts, Christopher},
|
|
1585
|
+
booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
|
|
1586
|
+
month = oct,
|
|
1587
|
+
year = {2013},
|
|
1588
|
+
address = {Seattle, Washington, USA},
|
|
1589
|
+
publisher = {Association for Computational Linguistics},
|
|
1590
|
+
url = {https://www.aclweb.org/anthology/D13-1170},
|
|
1591
|
+
pages = {1631--1642},
|
|
1592
|
+
}
|
|
1593
|
+
"""
|
|
1594
|
+
|
|
1595
|
+
@check_sst2_dataset
|
|
1596
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
|
|
1597
|
+
num_shards=None, shard_id=None, cache=None):
|
|
1598
|
+
super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
|
|
1599
|
+
num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
1600
|
+
self.dataset_dir = dataset_dir
|
|
1601
|
+
self.usage = replace_none(usage, "train")
|
|
1602
|
+
|
|
1603
|
+
def parse(self, children=None):
|
|
1604
|
+
return cde.SST2Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
|
|
1605
|
+
self.num_shards, self.shard_id)
|
|
1606
|
+
|
|
1607
|
+
|
|
1482
1608
|
class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
1483
1609
|
"""
|
|
1484
1610
|
A source dataset that reads and parses datasets stored on disk in text format.
|
|
@@ -1487,12 +1613,13 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1487
1613
|
Args:
|
|
1488
1614
|
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
|
|
1489
1615
|
pattern of files. The list will be sorted in a lexicographical order.
|
|
1490
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
1491
|
-
|
|
1492
|
-
num_parallel_workers (int, optional): Number of
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1616
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1617
|
+
Default: None, will include all images.
|
|
1618
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1619
|
+
Default: None, will use global default workers(8), it can be set
|
|
1620
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1621
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1622
|
+
Default: `Shuffle.GLOBAL` . Bool type and Shuffle enum are both supported to pass in.
|
|
1496
1623
|
If shuffle is False, no shuffling will be performed.
|
|
1497
1624
|
If shuffle is True, performs global shuffle.
|
|
1498
1625
|
There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
|
|
@@ -1501,20 +1628,20 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1501
1628
|
|
|
1502
1629
|
- Shuffle.FILES: Shuffle files only.
|
|
1503
1630
|
|
|
1504
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1631
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1505
1632
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1506
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1633
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1507
1634
|
argument can only be specified when `num_shards` is also specified.
|
|
1508
1635
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1509
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1510
|
-
|
|
1636
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1637
|
+
Default: None, which means no cache is used.
|
|
1511
1638
|
|
|
1512
1639
|
Raises:
|
|
1513
1640
|
ValueError: If dataset_files are not valid or do not exist.
|
|
1514
1641
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1515
1642
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1516
1643
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1517
|
-
ValueError: If `shard_id` is
|
|
1644
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1518
1645
|
|
|
1519
1646
|
Examples:
|
|
1520
1647
|
>>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
|
|
@@ -1536,48 +1663,66 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
|
|
|
1536
1663
|
|
|
1537
1664
|
class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
1538
1665
|
"""
|
|
1539
|
-
|
|
1666
|
+
UDPOS(Universal Dependencies dataset for Part of Speech) dataset.
|
|
1540
1667
|
|
|
1541
|
-
The generated dataset has three columns: :py:obj:`[word, universal, stanford]
|
|
1542
|
-
|
|
1543
|
-
The tensor of column :py:obj:`universal` is of the string type.
|
|
1544
|
-
The tensor of column :py:obj:`stanford` is of the string type.
|
|
1668
|
+
The generated dataset has three columns: :py:obj:`[word, universal, stanford]` ,
|
|
1669
|
+
and the data type of three columns is string.
|
|
1545
1670
|
|
|
1546
1671
|
Args:
|
|
1547
1672
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1548
1673
|
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read from
|
|
1549
1674
|
12,543 train samples, 'test' will read from 2,077 test samples, 'valid' will read from 2,002 test samples,
|
|
1550
|
-
'all' will read from all 16,622 samples
|
|
1551
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1552
|
-
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
|
|
1553
|
-
|
|
1554
|
-
If shuffle is False, no shuffling will be performed
|
|
1555
|
-
If shuffle is True,
|
|
1556
|
-
|
|
1675
|
+
'all' will read from all 16,622 samples. Default: None, all samples.
|
|
1676
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1677
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1678
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1679
|
+
If shuffle is False, no shuffling will be performed.
|
|
1680
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1681
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1557
1682
|
|
|
1558
1683
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1559
1684
|
|
|
1560
1685
|
- Shuffle.FILES: Shuffle files only.
|
|
1561
1686
|
|
|
1562
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1687
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1563
1688
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1564
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1689
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1565
1690
|
argument can only be specified when `num_shards` is also specified.
|
|
1566
|
-
num_parallel_workers (int, optional): Number of
|
|
1567
|
-
|
|
1691
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1692
|
+
Default: None, will use global default workers(8), it can be set
|
|
1693
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1568
1694
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1569
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1570
|
-
|
|
1695
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1696
|
+
Default: None, which means no cache is used.
|
|
1571
1697
|
|
|
1572
1698
|
Raises:
|
|
1573
1699
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1574
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1575
1700
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1576
1701
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1702
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1577
1703
|
|
|
1578
1704
|
Examples:
|
|
1579
1705
|
>>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
|
|
1580
1706
|
>>> dataset = ds.UDPOSDataset(dataset_dir=udpos_dataset_dir, usage='all')
|
|
1707
|
+
|
|
1708
|
+
About UDPOS dataset:
|
|
1709
|
+
|
|
1710
|
+
Text corpus dataset that clarifies syntactic or semantic sentence structure.
|
|
1711
|
+
The corpus comprises 254,830 words and 16,622 sentences, taken from various web media including
|
|
1712
|
+
weblogs, newsgroups, emails and reviews.
|
|
1713
|
+
|
|
1714
|
+
Citation:
|
|
1715
|
+
|
|
1716
|
+
.. code-block::
|
|
1717
|
+
|
|
1718
|
+
@inproceedings{silveira14gold,
|
|
1719
|
+
year = {2014},
|
|
1720
|
+
author = {Natalia Silveira and Timothy Dozat and Marie-Catherine de Marneffe and Samuel Bowman
|
|
1721
|
+
and Miriam Connor and John Bauer and Christopher D. Manning},
|
|
1722
|
+
title = {A Gold Standard Dependency Corpus for {E}nglish},
|
|
1723
|
+
booktitle = {Proceedings of the Ninth International Conference on Language
|
|
1724
|
+
Resources and Evaluation (LREC-2014)}
|
|
1725
|
+
}
|
|
1581
1726
|
"""
|
|
1582
1727
|
|
|
1583
1728
|
@check_udpos_dataset
|
|
@@ -1595,38 +1740,43 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
|
|
|
1595
1740
|
|
|
1596
1741
|
class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
1597
1742
|
"""
|
|
1598
|
-
|
|
1743
|
+
WikiText2 and WikiText103 datasets.
|
|
1599
1744
|
|
|
1600
|
-
The generated dataset has one column :py:obj:`[text]
|
|
1601
|
-
|
|
1745
|
+
The generated dataset has one column :py:obj:`[text]` , and
|
|
1746
|
+
the tensor of column `text` is of the string type.
|
|
1602
1747
|
|
|
1603
1748
|
Args:
|
|
1604
1749
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1605
|
-
usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all'
|
|
1606
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1607
|
-
num_parallel_workers (int, optional): Number of
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
If shuffle is
|
|
1613
|
-
|
|
1750
|
+
usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all'. Default: None, all samples.
|
|
1751
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
|
|
1752
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1753
|
+
Default: None, will use global default workers(8), it can be set
|
|
1754
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1755
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1756
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1757
|
+
If shuffle is False, no shuffling will be performed.
|
|
1758
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1759
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1614
1760
|
|
|
1615
1761
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1616
1762
|
|
|
1617
1763
|
- Shuffle.FILES: Shuffle files only.
|
|
1618
1764
|
|
|
1619
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1620
|
-
When this argument is specified,
|
|
1621
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1765
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1766
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1767
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1622
1768
|
argument can only be specified when `num_shards` is also specified.
|
|
1623
1769
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1624
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1625
|
-
|
|
1770
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1771
|
+
Default: None, which means no cache is used.
|
|
1626
1772
|
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1773
|
+
Raises:
|
|
1774
|
+
RuntimeError: If `dataset_dir` does not contain data files or invalid.
|
|
1775
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1776
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1777
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1778
|
+
ValueError: If `num_samples` is invalid (< 0).
|
|
1779
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1630
1780
|
|
|
1631
1781
|
About WikiTextDataset dataset:
|
|
1632
1782
|
|
|
@@ -1657,6 +1807,10 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1657
1807
|
journal={arXiv preprint arXiv:1609.07843},
|
|
1658
1808
|
year={2016}
|
|
1659
1809
|
}
|
|
1810
|
+
|
|
1811
|
+
Examples:
|
|
1812
|
+
>>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
|
|
1813
|
+
>>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
|
|
1660
1814
|
"""
|
|
1661
1815
|
|
|
1662
1816
|
@check_wiki_text_dataset
|
|
@@ -1674,47 +1828,44 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
|
|
|
1674
1828
|
|
|
1675
1829
|
class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
1676
1830
|
"""
|
|
1677
|
-
|
|
1831
|
+
YahooAnswers dataset.
|
|
1678
1832
|
|
|
1679
|
-
The generated dataset has
|
|
1680
|
-
The tensor of column :py:obj:`class` is of the string type.
|
|
1681
|
-
The tensor of column :py:obj:`title` is of the string type.
|
|
1682
|
-
The tensor of column :py:obj:`content` is of the string type.
|
|
1683
|
-
The tensor of column :py:obj:`answer` is of the string type.
|
|
1833
|
+
The generated dataset has four columns :py:obj:`[class, title, content, answer]` , whose data type is string.
|
|
1684
1834
|
|
|
1685
1835
|
Args:
|
|
1686
1836
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1687
1837
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read
|
|
1688
1838
|
from 1,400,000 train samples, 'test' will read from 60,000 test samples, 'all' will read from
|
|
1689
|
-
all 1,460,000 samples
|
|
1690
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
1691
|
-
|
|
1692
|
-
num_parallel_workers (int, optional): Number of
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
If shuffle is
|
|
1698
|
-
|
|
1839
|
+
all 1,460,000 samples. Default: None, all samples.
|
|
1840
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
1841
|
+
Default: None, will include all text.
|
|
1842
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1843
|
+
Default: None, will use global default workers(8), it can be set
|
|
1844
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1845
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1846
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1847
|
+
If shuffle is False, no shuffling will be performed.
|
|
1848
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1849
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1699
1850
|
|
|
1700
1851
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1701
1852
|
|
|
1702
1853
|
- Shuffle.FILES: Shuffle files only.
|
|
1703
1854
|
|
|
1704
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1855
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1705
1856
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1706
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1857
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1707
1858
|
argument can only be specified when `num_shards` is also specified.
|
|
1708
1859
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1709
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1710
|
-
|
|
1860
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1861
|
+
Default: None, which means no cache is used.
|
|
1711
1862
|
|
|
1712
1863
|
Raises:
|
|
1713
1864
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1714
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1715
1865
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1716
1866
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1717
|
-
ValueError: If `shard_id` is
|
|
1867
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1868
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1718
1869
|
|
|
1719
1870
|
Examples:
|
|
1720
1871
|
>>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
|
|
@@ -1772,11 +1923,9 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
|
|
|
1772
1923
|
|
|
1773
1924
|
class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
1774
1925
|
"""
|
|
1775
|
-
|
|
1926
|
+
Yelp Review Polarity and Yelp Review Full datasets.
|
|
1776
1927
|
|
|
1777
|
-
The generated dataset has two columns: :py:obj:`[label, text]
|
|
1778
|
-
The tensor of column :py:obj:`label` is of the string type.
|
|
1779
|
-
The tensor of column :py:obj:`text` is of the string type.
|
|
1928
|
+
The generated dataset has two columns: :py:obj:`[label, text]` , and the data type of two columns is string.
|
|
1780
1929
|
|
|
1781
1930
|
Args:
|
|
1782
1931
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
@@ -1784,32 +1933,33 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
|
|
|
1784
1933
|
For Polarity, 'train' will read from 560,000 train samples, 'test' will read from 38,000 test samples,
|
|
1785
1934
|
'all' will read from all 598,000 samples.
|
|
1786
1935
|
For Full, 'train' will read from 650,000 train samples, 'test' will read from 50,000 test samples,
|
|
1787
|
-
'all' will read from all 700,000 samples
|
|
1788
|
-
num_samples (int, optional): Number of samples (rows) to read
|
|
1789
|
-
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
|
|
1790
|
-
|
|
1791
|
-
If shuffle is False, no shuffling will be performed
|
|
1792
|
-
If shuffle is True,
|
|
1793
|
-
|
|
1936
|
+
'all' will read from all 700,000 samples. Default: None, all samples.
|
|
1937
|
+
num_samples (int, optional): Number of samples (rows) to read. Default: None, reads all samples.
|
|
1938
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
1939
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
1940
|
+
If shuffle is False, no shuffling will be performed.
|
|
1941
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
1942
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
1794
1943
|
|
|
1795
1944
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
1796
1945
|
|
|
1797
1946
|
- Shuffle.FILES: Shuffle files only.
|
|
1798
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1947
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1799
1948
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1800
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1949
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1801
1950
|
argument can only be specified when `num_shards` is also specified.
|
|
1802
|
-
num_parallel_workers (int, optional): Number of
|
|
1803
|
-
|
|
1951
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1952
|
+
Default: None, will use global default workers(8), it can be set
|
|
1953
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1804
1954
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1805
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1806
|
-
|
|
1955
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1956
|
+
Default: None, which means no cache is used.
|
|
1807
1957
|
|
|
1808
1958
|
Raises:
|
|
1809
1959
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1810
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1811
1960
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1812
1961
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1962
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1813
1963
|
|
|
1814
1964
|
Examples:
|
|
1815
1965
|
>>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"
|