mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -24,9 +24,13 @@ and use Lookup to find the index of tokens in Vocab.
|
|
|
24
24
|
class attributes (self.xxx) to support save() and load().
|
|
25
25
|
|
|
26
26
|
Examples:
|
|
27
|
-
>>>
|
|
27
|
+
>>> import mindspore.dataset as ds
|
|
28
|
+
>>> import mindspore.dataset.text as text
|
|
29
|
+
>>>
|
|
28
30
|
>>> # Create a dataset for text sentences saved as line data in a file
|
|
29
|
-
>>>
|
|
31
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
|
|
32
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list, shuffle=False)
|
|
33
|
+
>>>
|
|
30
34
|
>>> # Tokenize sentences to unicode characters
|
|
31
35
|
>>> tokenizer = text.UnicodeCharTokenizer()
|
|
32
36
|
>>> # Load vocabulary from list
|
|
@@ -99,7 +103,7 @@ class AddToken(TextTensorOperation):
|
|
|
99
103
|
token (str): The token to be added.
|
|
100
104
|
begin (bool, optional): Choose the position where the token is inserted. If True,
|
|
101
105
|
the token will be inserted at the beginning of the sequence. Otherwise, it will
|
|
102
|
-
be inserted at the end of the sequence. Default: True
|
|
106
|
+
be inserted at the end of the sequence. Default: ``True``.
|
|
103
107
|
|
|
104
108
|
Raises:
|
|
105
109
|
TypeError: If `token` is not of type string.
|
|
@@ -109,6 +113,9 @@ class AddToken(TextTensorOperation):
|
|
|
109
113
|
``CPU``
|
|
110
114
|
|
|
111
115
|
Examples:
|
|
116
|
+
>>> import mindspore.dataset as ds
|
|
117
|
+
>>> import mindspore.dataset.text as text
|
|
118
|
+
>>>
|
|
112
119
|
>>> dataset = ds.NumpySlicesDataset(data={"text": [['a', 'b', 'c', 'd', 'e']]})
|
|
113
120
|
>>> # Data before
|
|
114
121
|
>>> # | text |
|
|
@@ -122,6 +129,10 @@ class AddToken(TextTensorOperation):
|
|
|
122
129
|
>>> # +---------------------------+
|
|
123
130
|
>>> # | ['TOKEN', 'a', 'b', 'c', 'd', 'e'] |
|
|
124
131
|
>>> # +---------------------------+
|
|
132
|
+
|
|
133
|
+
Tutorial Examples:
|
|
134
|
+
- `Illustration of text transforms
|
|
135
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
125
136
|
"""
|
|
126
137
|
|
|
127
138
|
@check_add_token
|
|
@@ -136,49 +147,53 @@ class AddToken(TextTensorOperation):
|
|
|
136
147
|
|
|
137
148
|
class JiebaTokenizer(TextTensorOperation):
|
|
138
149
|
"""
|
|
139
|
-
|
|
150
|
+
Use Jieba tokenizer to tokenize Chinese strings.
|
|
140
151
|
|
|
141
152
|
Note:
|
|
142
|
-
The
|
|
153
|
+
The dictionary files used by Hidden Markov Model segment and Max Probability segment can be
|
|
154
|
+
obtained through the `cppjieba GitHub <https://github.com/yanyiwu/cppjieba/tree/master/dict>`_ .
|
|
155
|
+
Please ensure the validity and integrity of these files.
|
|
143
156
|
|
|
144
157
|
Args:
|
|
145
|
-
hmm_path (str):
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
- JiebaMode.MP, tokenize with MPSegment algorithm.
|
|
153
|
-
|
|
154
|
-
- JiebaMode.HMM, tokenize with Hidden Markov Model Segment algorithm.
|
|
155
|
-
|
|
156
|
-
- JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
|
|
157
|
-
|
|
158
|
-
with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
|
|
158
|
+
hmm_path (str): Path to the dictionary file used by Hidden Markov Model segment.
|
|
159
|
+
mp_path (str): Path to the dictionary file used by Max Probability segment.
|
|
160
|
+
mode (JiebaMode, optional): The desired segment algorithms. See :class:`~.text.JiebaMode`
|
|
161
|
+
for details on optional values. Default: ``JiebaMode.MIX`` .
|
|
162
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
163
|
+
token in the original string. Default: ``False`` .
|
|
159
164
|
|
|
160
165
|
Raises:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
TypeError: If `
|
|
166
|
+
TypeError: If `hmm_path` is not of type str.
|
|
167
|
+
TypeError: If `mp_path` is not of type str.
|
|
168
|
+
TypeError: If `mode` is not of type :class:`~.text.JiebaMode` .
|
|
164
169
|
TypeError: If `with_offsets` is not of type bool.
|
|
165
170
|
|
|
166
171
|
Supported Platforms:
|
|
167
172
|
``CPU``
|
|
168
173
|
|
|
169
174
|
Examples:
|
|
175
|
+
>>> import mindspore.dataset as ds
|
|
170
176
|
>>> import mindspore.dataset.text as text
|
|
171
177
|
>>> from mindspore.dataset.text import JiebaMode
|
|
172
|
-
>>>
|
|
178
|
+
>>>
|
|
179
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
180
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
181
|
+
>>>
|
|
182
|
+
>>> # 1) If with_offsets=False, return one data column {["text", dtype=str]}
|
|
173
183
|
>>> jieba_hmm_file = "/path/to/jieba/hmm/file"
|
|
174
184
|
>>> jieba_mp_file = "/path/to/jieba/mp/file"
|
|
175
185
|
>>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=False)
|
|
176
186
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
177
|
-
>>>
|
|
178
|
-
>>> #
|
|
187
|
+
>>>
|
|
188
|
+
>>> # 2) If with_offsets=True, return three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
|
|
189
|
+
>>> # ["offsets_limit", dtype=uint32]}
|
|
179
190
|
>>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=True)
|
|
180
|
-
>>>
|
|
181
|
-
...
|
|
191
|
+
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
192
|
+
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
193
|
+
|
|
194
|
+
Tutorial Examples:
|
|
195
|
+
- `Illustration of text transforms
|
|
196
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
182
197
|
"""
|
|
183
198
|
|
|
184
199
|
@check_jieba_init
|
|
@@ -213,17 +228,19 @@ class JiebaTokenizer(TextTensorOperation):
|
|
|
213
228
|
@check_jieba_add_word
|
|
214
229
|
def add_word(self, word, freq=None):
|
|
215
230
|
"""
|
|
216
|
-
Add a
|
|
231
|
+
Add a specified word mapping to the Vocab of the tokenizer.
|
|
217
232
|
|
|
218
233
|
Args:
|
|
219
|
-
word (str): The word to be added to the
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
234
|
+
word (str): The word to be added to the Vocab.
|
|
235
|
+
freq (int, optional): The frequency of the word to be added. The higher the word frequency,
|
|
236
|
+
the greater the chance that the word will be tokenized. Default: ``None``, using the
|
|
237
|
+
default word frequency.
|
|
223
238
|
|
|
224
239
|
Examples:
|
|
240
|
+
>>> import mindspore.dataset as ds
|
|
225
241
|
>>> import mindspore.dataset.text as text
|
|
226
242
|
>>> from mindspore.dataset.text import JiebaMode
|
|
243
|
+
>>>
|
|
227
244
|
>>> jieba_hmm_file = "/path/to/jieba/hmm/file"
|
|
228
245
|
>>> jieba_mp_file = "/path/to/jieba/mp/file"
|
|
229
246
|
>>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
|
|
@@ -232,6 +249,9 @@ class JiebaTokenizer(TextTensorOperation):
|
|
|
232
249
|
... for line in f:
|
|
233
250
|
... word = line.split(',')[0]
|
|
234
251
|
... jieba_op.add_word(word)
|
|
252
|
+
>>>
|
|
253
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
254
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
235
255
|
>>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
|
|
236
256
|
"""
|
|
237
257
|
|
|
@@ -244,30 +264,30 @@ class JiebaTokenizer(TextTensorOperation):
|
|
|
244
264
|
@check_jieba_add_dict
|
|
245
265
|
def add_dict(self, user_dict):
|
|
246
266
|
"""
|
|
247
|
-
Add
|
|
267
|
+
Add the specified word mappings to the Vocab of the tokenizer.
|
|
248
268
|
|
|
249
269
|
Args:
|
|
250
|
-
user_dict (Union[str, dict]):
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
word1 freq1
|
|
258
|
-
word2 None
|
|
259
|
-
word3 freq3
|
|
260
|
-
|
|
261
|
-
Only valid word-freq pairs in user provided file will be added into the dictionary.
|
|
262
|
-
Rows containing invalid input will be ignored. No error nor warning Status is returned.
|
|
270
|
+
user_dict (Union[str, dict[str, int]]): The word mappings to be added to the Vocab.
|
|
271
|
+
If the input type is str, it means the path of the file storing the word mappings to be added.
|
|
272
|
+
Each line of the file should contain two fields separated by a space, where the first field
|
|
273
|
+
indicates the word itself and the second field should be a number indicating the word frequency.
|
|
274
|
+
Invalid lines will be ignored and no error or warning will be returned.
|
|
275
|
+
If the input type is dict[str, int], it means the dictionary storing the word mappings to be added,
|
|
276
|
+
where the key name is the word itself and the key value is the word frequency.
|
|
263
277
|
|
|
264
278
|
Examples:
|
|
279
|
+
>>> import mindspore.dataset as ds
|
|
280
|
+
>>> import mindspore.dataset.text as text
|
|
265
281
|
>>> from mindspore.dataset.text import JiebaMode
|
|
282
|
+
>>>
|
|
266
283
|
>>> jieba_hmm_file = "/path/to/jieba/hmm/file"
|
|
267
284
|
>>> jieba_mp_file = "/path/to/jieba/mp/file"
|
|
268
285
|
>>> user_dict = {"男默女泪": 10}
|
|
269
286
|
>>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
|
|
270
287
|
>>> jieba_op.add_dict(user_dict)
|
|
288
|
+
>>>
|
|
289
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
290
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
271
291
|
>>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
|
|
272
292
|
"""
|
|
273
293
|
|
|
@@ -303,7 +323,7 @@ class JiebaTokenizer(TextTensorOperation):
|
|
|
303
323
|
raise ValueError(
|
|
304
324
|
"user dict file {} is not exist.".format(file_path))
|
|
305
325
|
real_file_path = os.path.realpath(file_path)
|
|
306
|
-
file_dict = open(real_file_path)
|
|
326
|
+
file_dict = open(real_file_path, "r")
|
|
307
327
|
data_re = re.compile('^\\s*([^\\s*]+?)\\s*([0-9]+)?\\s*$', re.U)
|
|
308
328
|
words_list = []
|
|
309
329
|
for item in file_dict:
|
|
@@ -327,9 +347,9 @@ class Lookup(TextTensorOperation):
|
|
|
327
347
|
vocab (Vocab): A vocabulary object.
|
|
328
348
|
unknown_token (str, optional): Word is used for lookup. In case of the word is out of vocabulary (OOV),
|
|
329
349
|
the result of lookup will be replaced with unknown_token. If the unknown_token is not specified or
|
|
330
|
-
it is OOV, runtime error will be thrown. Default: None
|
|
350
|
+
it is OOV, runtime error will be thrown. Default: ``None``, means no unknown_token is specified.
|
|
331
351
|
data_type (mindspore.dtype, optional): The data type that lookup operation maps
|
|
332
|
-
string to. Default:
|
|
352
|
+
string to. Default: ``mstype.int32``.
|
|
333
353
|
|
|
334
354
|
Raises:
|
|
335
355
|
TypeError: If `vocab` is not of type text.Vocab.
|
|
@@ -340,12 +360,20 @@ class Lookup(TextTensorOperation):
|
|
|
340
360
|
``CPU``
|
|
341
361
|
|
|
342
362
|
Examples:
|
|
363
|
+
>>> import mindspore.dataset as ds
|
|
343
364
|
>>> import mindspore.dataset.text as text
|
|
344
365
|
>>> # Load vocabulary from list
|
|
345
366
|
>>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您'])
|
|
346
367
|
>>> # Use Lookup operation to map tokens to ids
|
|
347
368
|
>>> lookup = text.Lookup(vocab)
|
|
369
|
+
>>>
|
|
370
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
371
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
348
372
|
>>> text_file_dataset = text_file_dataset.map(operations=[lookup])
|
|
373
|
+
|
|
374
|
+
Tutorial Examples:
|
|
375
|
+
- `Illustration of text transforms
|
|
376
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
349
377
|
"""
|
|
350
378
|
|
|
351
379
|
@check_lookup
|
|
@@ -373,13 +401,13 @@ class Ngram(TextTensorOperation):
|
|
|
373
401
|
an empty string produced.
|
|
374
402
|
left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width).
|
|
375
403
|
`pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the
|
|
376
|
-
sequence with "__". Default: ('', 0)
|
|
404
|
+
sequence with "__". Default: ``('', 0)``.
|
|
377
405
|
right_pad (tuple, optional): Padding performed on right side of the sequence shaped like
|
|
378
406
|
("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("_", 2)
|
|
379
|
-
would pad right side of the sequence with "__". Default: ('', 0)
|
|
407
|
+
would pad right side of the sequence with "__". Default: ``('', 0)``.
|
|
380
408
|
separator (str, optional): Symbol used to join strings together. For example, if 2-gram is
|
|
381
|
-
["mindspore", "amazing"] with separator
|
|
382
|
-
Default: ' '
|
|
409
|
+
["mindspore", "amazing"] with separator is ``"-"``, the result would be ["mindspore-amazing"].
|
|
410
|
+
Default: ``' '``, which will use whitespace as separator.
|
|
383
411
|
|
|
384
412
|
Raises:
|
|
385
413
|
TypeError: If values of `n` not positive is not of type int.
|
|
@@ -392,13 +420,21 @@ class Ngram(TextTensorOperation):
|
|
|
392
420
|
``CPU``
|
|
393
421
|
|
|
394
422
|
Examples:
|
|
423
|
+
>>> import mindspore.dataset as ds
|
|
395
424
|
>>> import mindspore.dataset.text as text
|
|
396
425
|
>>> ngram_op = text.Ngram(3, separator="-")
|
|
397
426
|
>>> output = ngram_op(["WildRose Country", "Canada's Ocean Playground", "Land of Living Skies"])
|
|
398
427
|
>>> # output
|
|
399
428
|
>>> # ["WildRose Country-Canada's Ocean Playground-Land of Living Skies"]
|
|
429
|
+
>>>
|
|
400
430
|
>>> # same ngram_op called through map
|
|
431
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
432
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
401
433
|
>>> text_file_dataset = text_file_dataset.map(operations=ngram_op)
|
|
434
|
+
|
|
435
|
+
Tutorial Examples:
|
|
436
|
+
- `Illustration of text transforms
|
|
437
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
402
438
|
"""
|
|
403
439
|
|
|
404
440
|
@check_ngram
|
|
@@ -427,9 +463,19 @@ class PythonTokenizer:
|
|
|
427
463
|
``CPU``
|
|
428
464
|
|
|
429
465
|
Examples:
|
|
466
|
+
>>> import mindspore.dataset as ds
|
|
467
|
+
>>> import mindspore.dataset.text as text
|
|
468
|
+
>>>
|
|
430
469
|
>>> def my_tokenizer(line):
|
|
431
470
|
... return line.split()
|
|
471
|
+
>>>
|
|
472
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
473
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
432
474
|
>>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
|
|
475
|
+
|
|
476
|
+
Tutorial Examples:
|
|
477
|
+
- `Illustration of text transforms
|
|
478
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
433
479
|
"""
|
|
434
480
|
|
|
435
481
|
@check_python_tokenizer
|
|
@@ -464,11 +510,11 @@ class SentencePieceTokenizer(TextTensorOperation):
|
|
|
464
510
|
mode (Union[str, SentencePieceVocab]): SentencePiece model.
|
|
465
511
|
If the input parameter is a file, it represents the path of SentencePiece mode to be loaded.
|
|
466
512
|
If the input parameter is a SentencePieceVocab object, it should be constructed in advanced.
|
|
467
|
-
out_type (SPieceTokenizerOutType): The type of output, it can be
|
|
468
|
-
SPieceTokenizerOutType.INT
|
|
513
|
+
out_type (SPieceTokenizerOutType): The type of output, it can be ``SPieceTokenizerOutType.STRING``,
|
|
514
|
+
``SPieceTokenizerOutType.INT``.
|
|
469
515
|
|
|
470
|
-
- SPieceTokenizerOutType.STRING
|
|
471
|
-
- SPieceTokenizerOutType.INT
|
|
516
|
+
- ``SPieceTokenizerOutType.STRING``, means output type of SentencePice Tokenizer is string.
|
|
517
|
+
- ``SPieceTokenizerOutType.INT``, means output type of SentencePice Tokenizer is int.
|
|
472
518
|
|
|
473
519
|
Raises:
|
|
474
520
|
TypeError: If `mode` is not of type string or SentencePieceVocab.
|
|
@@ -478,13 +524,22 @@ class SentencePieceTokenizer(TextTensorOperation):
|
|
|
478
524
|
``CPU``
|
|
479
525
|
|
|
480
526
|
Examples:
|
|
527
|
+
>>> import mindspore.dataset as ds
|
|
481
528
|
>>> import mindspore.dataset.text as text
|
|
482
529
|
>>> from mindspore.dataset.text import SentencePieceModel, SPieceTokenizerOutType
|
|
530
|
+
>>>
|
|
483
531
|
>>> sentence_piece_vocab_file = "/path/to/sentence/piece/vocab/file"
|
|
484
532
|
>>> vocab = text.SentencePieceVocab.from_file([sentence_piece_vocab_file], 5000, 0.9995,
|
|
485
533
|
... SentencePieceModel.UNIGRAM, {})
|
|
486
534
|
>>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)
|
|
535
|
+
>>>
|
|
536
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
537
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
487
538
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer)
|
|
539
|
+
|
|
540
|
+
Tutorial Examples:
|
|
541
|
+
- `Illustration of text transforms
|
|
542
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
488
543
|
"""
|
|
489
544
|
|
|
490
545
|
@check_sentence_piece_tokenizer
|
|
@@ -505,7 +560,7 @@ class SlidingWindow(TextTensorOperation):
|
|
|
505
560
|
|
|
506
561
|
Args:
|
|
507
562
|
width (int): The width of the window. It must be an integer and greater than zero.
|
|
508
|
-
axis (int, optional): The axis along which the sliding window is computed. Default: 0
|
|
563
|
+
axis (int, optional): The axis along which the sliding window is computed. Default: ``0``.
|
|
509
564
|
|
|
510
565
|
Raises:
|
|
511
566
|
TypeError: If `width` is not of type int.
|
|
@@ -517,6 +572,8 @@ class SlidingWindow(TextTensorOperation):
|
|
|
517
572
|
|
|
518
573
|
Examples:
|
|
519
574
|
>>> import mindspore.dataset as ds
|
|
575
|
+
>>> import mindspore.dataset.text as text
|
|
576
|
+
>>>
|
|
520
577
|
>>> dataset = ds.NumpySlicesDataset(data=[[1, 2, 3, 4, 5]], column_names="col1")
|
|
521
578
|
>>> # Data before
|
|
522
579
|
>>> # | col1 |
|
|
@@ -531,6 +588,10 @@ class SlidingWindow(TextTensorOperation):
|
|
|
531
588
|
>>> # | [2, 3, 4], |
|
|
532
589
|
>>> # | [3, 4, 5]] |
|
|
533
590
|
>>> # +--------------+
|
|
591
|
+
|
|
592
|
+
Tutorial Examples:
|
|
593
|
+
- `Illustration of text transforms
|
|
594
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
534
595
|
"""
|
|
535
596
|
|
|
536
597
|
@check_slidingwindow
|
|
@@ -566,10 +627,15 @@ class ToNumber(TextTensorOperation):
|
|
|
566
627
|
>>> import mindspore.dataset as ds
|
|
567
628
|
>>> import mindspore.dataset.text as text
|
|
568
629
|
>>> from mindspore import dtype as mstype
|
|
630
|
+
>>>
|
|
569
631
|
>>> data = [["1", "2", "3"]]
|
|
570
632
|
>>> dataset = ds.NumpySlicesDataset(data)
|
|
571
633
|
>>> to_number_op = text.ToNumber(mstype.int8)
|
|
572
634
|
>>> dataset = dataset.map(operations=to_number_op)
|
|
635
|
+
|
|
636
|
+
Tutorial Examples:
|
|
637
|
+
- `Illustration of text transforms
|
|
638
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
573
639
|
"""
|
|
574
640
|
|
|
575
641
|
@check_to_number
|
|
@@ -589,10 +655,11 @@ class ToVectors(TextTensorOperation):
|
|
|
589
655
|
Args:
|
|
590
656
|
vectors (Vectors): A vectors object.
|
|
591
657
|
unk_init (sequence, optional): Sequence used to initialize out-of-vectors (OOV) token.
|
|
592
|
-
Default: None
|
|
593
|
-
lower_case_backup (bool, optional): Whether to look up the token in the lower case. If False
|
|
594
|
-
original case will be looked up; if True
|
|
595
|
-
found in the keys of the property stoi, the token in the
|
|
658
|
+
Default: ``None``, initialize with zero vectors.
|
|
659
|
+
lower_case_backup (bool, optional): Whether to look up the token in the lower case. If ``False``,
|
|
660
|
+
each token in the original case will be looked up; if ``True``, each token in the original
|
|
661
|
+
case will be looked up first, if not found in the keys of the property stoi, the token in the
|
|
662
|
+
lower case will be looked up. Default: ``False``.
|
|
596
663
|
|
|
597
664
|
Raises:
|
|
598
665
|
TypeError: If `unk_init` is not of type sequence.
|
|
@@ -603,12 +670,21 @@ class ToVectors(TextTensorOperation):
|
|
|
603
670
|
``CPU``
|
|
604
671
|
|
|
605
672
|
Examples:
|
|
673
|
+
>>> import mindspore.dataset as ds
|
|
606
674
|
>>> import mindspore.dataset.text as text
|
|
675
|
+
>>>
|
|
607
676
|
>>> # Load vectors from file
|
|
608
677
|
>>> vectors = text.Vectors.from_file("/path/to/vectors/file")
|
|
609
678
|
>>> # Use ToVectors operation to map tokens to vectors
|
|
610
679
|
>>> to_vectors = text.ToVectors(vectors)
|
|
680
|
+
>>>
|
|
681
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
682
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
611
683
|
>>> text_file_dataset = text_file_dataset.map(operations=[to_vectors])
|
|
684
|
+
|
|
685
|
+
Tutorial Examples:
|
|
686
|
+
- `Illustration of text transforms
|
|
687
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
612
688
|
"""
|
|
613
689
|
|
|
614
690
|
@check_to_vectors
|
|
@@ -638,6 +714,9 @@ class Truncate(TextTensorOperation):
|
|
|
638
714
|
``CPU``
|
|
639
715
|
|
|
640
716
|
Examples:
|
|
717
|
+
>>> import mindspore.dataset as ds
|
|
718
|
+
>>> import mindspore.dataset.text as text
|
|
719
|
+
>>>
|
|
641
720
|
>>> dataset = ds.NumpySlicesDataset(data=[['a', 'b', 'c', 'd', 'e']], column_names=["text"], shuffle=False)
|
|
642
721
|
>>> # Data before
|
|
643
722
|
>>> # | col1 |
|
|
@@ -651,6 +730,10 @@ class Truncate(TextTensorOperation):
|
|
|
651
730
|
>>> # +------------------------+
|
|
652
731
|
>>> # | ['a', 'b', 'c', 'd'] |
|
|
653
732
|
>>> # +------------------------+
|
|
733
|
+
|
|
734
|
+
Tutorial Examples:
|
|
735
|
+
- `Illustration of text transforms
|
|
736
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
654
737
|
"""
|
|
655
738
|
|
|
656
739
|
@check_truncate
|
|
@@ -664,12 +747,12 @@ class Truncate(TextTensorOperation):
|
|
|
664
747
|
|
|
665
748
|
class TruncateSequencePair(TextTensorOperation):
|
|
666
749
|
"""
|
|
667
|
-
Truncate a pair of
|
|
668
|
-
|
|
669
|
-
This operation takes two input tensors and returns two output Tensors.
|
|
750
|
+
Truncate a pair of 1-D string input so that their total length is less than the specified length.
|
|
670
751
|
|
|
671
752
|
Args:
|
|
672
|
-
max_length (int):
|
|
753
|
+
max_length (int): The maximum total length of the output strings. If it is no less than the
|
|
754
|
+
total length of the original pair of strings, no truncation is performed; otherwise, the
|
|
755
|
+
longer of the two input strings is truncated until its total length equals this value.
|
|
673
756
|
|
|
674
757
|
Raises:
|
|
675
758
|
TypeError: If `max_length` is not of type int.
|
|
@@ -678,7 +761,9 @@ class TruncateSequencePair(TextTensorOperation):
|
|
|
678
761
|
``CPU``
|
|
679
762
|
|
|
680
763
|
Examples:
|
|
764
|
+
>>> import mindspore.dataset as ds
|
|
681
765
|
>>> import mindspore.dataset.text as text
|
|
766
|
+
>>>
|
|
682
767
|
>>> dataset = ds.NumpySlicesDataset(data={"col1": [[1, 2, 3]], "col2": [[4, 5]]})
|
|
683
768
|
>>> # Data before
|
|
684
769
|
>>> # | col1 | col2 |
|
|
@@ -692,6 +777,10 @@ class TruncateSequencePair(TextTensorOperation):
|
|
|
692
777
|
>>> # +-----------+-----------+
|
|
693
778
|
>>> # | [1, 2] | [4, 5] |
|
|
694
779
|
>>> # +-----------+-----------+
|
|
780
|
+
|
|
781
|
+
Tutorial Examples:
|
|
782
|
+
- `Illustration of text transforms
|
|
783
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
695
784
|
"""
|
|
696
785
|
|
|
697
786
|
@check_pair_truncate
|
|
@@ -705,10 +794,11 @@ class TruncateSequencePair(TextTensorOperation):
|
|
|
705
794
|
|
|
706
795
|
class UnicodeCharTokenizer(TextTensorOperation):
|
|
707
796
|
"""
|
|
708
|
-
|
|
797
|
+
Unpack the Unicode characters in the input strings.
|
|
709
798
|
|
|
710
799
|
Args:
|
|
711
|
-
with_offsets (bool, optional): Whether
|
|
800
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
801
|
+
token in the original string. Default: ``False`` .
|
|
712
802
|
|
|
713
803
|
Raises:
|
|
714
804
|
TypeError: If `with_offsets` is not of type bool.
|
|
@@ -717,15 +807,25 @@ class UnicodeCharTokenizer(TextTensorOperation):
|
|
|
717
807
|
``CPU``
|
|
718
808
|
|
|
719
809
|
Examples:
|
|
810
|
+
>>> import mindspore.dataset as ds
|
|
720
811
|
>>> import mindspore.dataset.text as text
|
|
812
|
+
>>>
|
|
813
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
814
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
815
|
+
>>>
|
|
721
816
|
>>> # If with_offsets=False, default output one column {["text", dtype=str]}
|
|
722
817
|
>>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=False)
|
|
723
818
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
819
|
+
>>>
|
|
724
820
|
>>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
|
|
725
821
|
>>> # ["offsets_limit", dtype=uint32]}
|
|
726
822
|
>>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=True)
|
|
727
823
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
728
824
|
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
825
|
+
|
|
826
|
+
Tutorial Examples:
|
|
827
|
+
- `Illustration of text transforms
|
|
828
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
729
829
|
"""
|
|
730
830
|
|
|
731
831
|
@check_with_offsets
|
|
@@ -743,13 +843,14 @@ class WordpieceTokenizer(TextTensorOperation):
|
|
|
743
843
|
|
|
744
844
|
Args:
|
|
745
845
|
vocab (Vocab): Vocabulary used to look up words.
|
|
746
|
-
suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: '##'
|
|
846
|
+
suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
|
|
747
847
|
max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
|
|
748
|
-
not be split. Default: 100
|
|
848
|
+
not be split. Default: ``100``.
|
|
749
849
|
unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
|
|
750
850
|
unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
|
|
751
|
-
output. Default: '[UNK]'
|
|
752
|
-
with_offsets (bool, optional): Whether to
|
|
851
|
+
output. Default: ``'[UNK]'``.
|
|
852
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
853
|
+
token in the original string. Default: ``False`` .
|
|
753
854
|
|
|
754
855
|
Raises:
|
|
755
856
|
TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
|
|
@@ -763,19 +864,31 @@ class WordpieceTokenizer(TextTensorOperation):
|
|
|
763
864
|
``CPU``
|
|
764
865
|
|
|
765
866
|
Examples:
|
|
867
|
+
>>> import mindspore.dataset as ds
|
|
766
868
|
>>> import mindspore.dataset.text as text
|
|
869
|
+
>>>
|
|
870
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
871
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
872
|
+
>>>
|
|
767
873
|
>>> vocab_list = ["book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"]
|
|
768
874
|
>>> vocab = text.Vocab.from_list(vocab_list)
|
|
875
|
+
>>>
|
|
769
876
|
>>> # If with_offsets=False, default output one column {["text", dtype=str]}
|
|
770
877
|
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
|
|
771
878
|
... max_bytes_per_token=100, with_offsets=False)
|
|
772
879
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
880
|
+
>>>
|
|
773
881
|
>>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
|
|
774
882
|
>>> # ["offsets_limit", dtype=uint32]}
|
|
775
883
|
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
|
|
776
884
|
... max_bytes_per_token=100, with_offsets=True)
|
|
885
|
+
>>>
|
|
777
886
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
778
887
|
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
888
|
+
|
|
889
|
+
Tutorial Examples:
|
|
890
|
+
- `Illustration of text transforms
|
|
891
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
779
892
|
"""
|
|
780
893
|
|
|
781
894
|
@check_wordpiece_tokenizer
|
|
@@ -813,27 +926,20 @@ if platform.system().lower() != 'windows':
|
|
|
813
926
|
Args:
|
|
814
927
|
lower_case (bool, optional): Whether to perform lowercase processing on the text. If True, will fold the
|
|
815
928
|
text to lower case and strip accented characters. If False, will only perform normalization on the
|
|
816
|
-
text, with mode specified by `normalization_form` . Default: False
|
|
817
|
-
keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: False
|
|
818
|
-
normalization_form (NormalizeForm, optional):
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
NormalizeForm.NFKD. Default: NormalizeForm.NONE.
|
|
822
|
-
|
|
823
|
-
- NormalizeForm.NONE, no normalization.
|
|
824
|
-
- NormalizeForm.NFC, Canonical Decomposition, followed by Canonical Composition.
|
|
825
|
-
- NormalizeForm.NFKC, Compatibility Decomposition, followed by Canonical Composition.
|
|
826
|
-
- NormalizeForm.NFD, Canonical Decomposition.
|
|
827
|
-
- NormalizeForm.NFKD, Compatibility Decomposition.
|
|
828
|
-
|
|
929
|
+
text, with mode specified by `normalization_form` . Default: ``False``.
|
|
930
|
+
keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: ``False``.
|
|
931
|
+
normalization_form (NormalizeForm, optional): The desired normalization form.
|
|
932
|
+
See :class:`~.text.NormalizeForm` for details on optional values.
|
|
933
|
+
Default: ``NormalizeForm.NFKC`` .
|
|
829
934
|
preserve_unused_token (bool, optional): Whether to preserve special tokens. If True, will not split special
|
|
830
|
-
tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: True
|
|
831
|
-
with_offsets (bool, optional): Whether to
|
|
935
|
+
tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: ``True``.
|
|
936
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
937
|
+
token in the original string. Default: ``False`` .
|
|
832
938
|
|
|
833
939
|
Raises:
|
|
834
940
|
TypeError: If `lower_case` is not of type bool.
|
|
835
941
|
TypeError: If `keep_whitespace` is not of type bool.
|
|
836
|
-
TypeError: If `normalization_form` is not of type :class
|
|
942
|
+
TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
|
|
837
943
|
TypeError: If `preserve_unused_token` is not of type bool.
|
|
838
944
|
TypeError: If `with_offsets` is not of type bool.
|
|
839
945
|
RuntimeError: If dtype of input Tensor is not str.
|
|
@@ -842,27 +948,34 @@ if platform.system().lower() != 'windows':
|
|
|
842
948
|
``CPU``
|
|
843
949
|
|
|
844
950
|
Examples:
|
|
951
|
+
>>> import mindspore.dataset as ds
|
|
845
952
|
>>> import mindspore.dataset.text as text
|
|
846
953
|
>>> from mindspore.dataset.text import NormalizeForm
|
|
847
954
|
>>>
|
|
848
|
-
>>>
|
|
955
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
956
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
957
|
+
>>>
|
|
958
|
+
>>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
|
|
849
959
|
>>> tokenizer_op = text.BasicTokenizer(lower_case=False,
|
|
850
960
|
... keep_whitespace=False,
|
|
851
961
|
... normalization_form=NormalizeForm.NONE,
|
|
852
962
|
... preserve_unused_token=True,
|
|
853
963
|
... with_offsets=False)
|
|
854
964
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
855
|
-
>>> # If with_offsets=True, then output three columns {["token", dtype=str],
|
|
856
|
-
>>> #
|
|
857
|
-
>>> #
|
|
965
|
+
>>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
|
|
966
|
+
>>> # ["offsets_start", dtype=uint32],
|
|
967
|
+
>>> # ["offsets_limit", dtype=uint32]}
|
|
858
968
|
>>> tokenizer_op = text.BasicTokenizer(lower_case=False,
|
|
859
969
|
... keep_whitespace=False,
|
|
860
970
|
... normalization_form=NormalizeForm.NONE,
|
|
861
971
|
... preserve_unused_token=True,
|
|
862
972
|
... with_offsets=True)
|
|
863
|
-
>>>
|
|
864
|
-
...
|
|
865
|
-
|
|
973
|
+
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
974
|
+
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
975
|
+
|
|
976
|
+
Tutorial Examples:
|
|
977
|
+
- `Illustration of text transforms
|
|
978
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
866
979
|
"""
|
|
867
980
|
|
|
868
981
|
@check_basic_tokenizer
|
|
@@ -892,30 +1005,25 @@ if platform.system().lower() != 'windows':
|
|
|
892
1005
|
|
|
893
1006
|
Args:
|
|
894
1007
|
vocab (Vocab): Vocabulary used to look up words.
|
|
895
|
-
suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: '##'
|
|
1008
|
+
suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
|
|
896
1009
|
max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
|
|
897
|
-
not be split. Default: 100
|
|
1010
|
+
not be split. Default: ``100``.
|
|
898
1011
|
unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
|
|
899
1012
|
unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
|
|
900
|
-
output. Default: '[UNK]'
|
|
901
|
-
lower_case (bool, optional): Whether to perform lowercase processing on the text. If True
|
|
902
|
-
text to lower case and strip accented characters. If False
|
|
903
|
-
text, with mode specified by `normalization_form` . Default: False
|
|
904
|
-
keep_whitespace (bool, optional): If True
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
- NormalizeForm.NFKD, Compatibility Decomposition.
|
|
915
|
-
|
|
916
|
-
preserve_unused_token (bool, optional): Whether to preserve special tokens. If True, will not split special
|
|
917
|
-
tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: True.
|
|
918
|
-
with_offsets (bool, optional): Whether to return the offsets of tokens. Default: False.
|
|
1013
|
+
output. Default: ``'[UNK]'``.
|
|
1014
|
+
lower_case (bool, optional): Whether to perform lowercase processing on the text. If ``True``, will fold the
|
|
1015
|
+
text to lower case and strip accented characters. If ``False``, will only perform normalization on the
|
|
1016
|
+
text, with mode specified by `normalization_form` . Default: ``False``.
|
|
1017
|
+
keep_whitespace (bool, optional): If ``True``, the whitespace will be kept in the output.
|
|
1018
|
+
Default: ``False``.
|
|
1019
|
+
normalization_form (NormalizeForm, optional): The desired normalization form.
|
|
1020
|
+
See :class:`~.text.NormalizeForm` for details on optional values.
|
|
1021
|
+
Default: ``NormalizeForm.NFKC`` .
|
|
1022
|
+
preserve_unused_token (bool, optional): Whether to preserve special tokens. If ``True``,
|
|
1023
|
+
will not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'.
|
|
1024
|
+
Default: ``True``.
|
|
1025
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
1026
|
+
token in the original string. Default: ``False`` .
|
|
919
1027
|
|
|
920
1028
|
Raises:
|
|
921
1029
|
TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
|
|
@@ -925,7 +1033,7 @@ if platform.system().lower() != 'windows':
|
|
|
925
1033
|
TypeError: If `unknown_token` is not of type str.
|
|
926
1034
|
TypeError: If `lower_case` is not of type bool.
|
|
927
1035
|
TypeError: If `keep_whitespace` is not of type bool.
|
|
928
|
-
TypeError: If `normalization_form` is not of type :class
|
|
1036
|
+
TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
|
|
929
1037
|
TypeError: If `preserve_unused_token` is not of type bool.
|
|
930
1038
|
TypeError: If `with_offsets` is not of type bool.
|
|
931
1039
|
|
|
@@ -933,10 +1041,14 @@ if platform.system().lower() != 'windows':
|
|
|
933
1041
|
``CPU``
|
|
934
1042
|
|
|
935
1043
|
Examples:
|
|
1044
|
+
>>> import mindspore.dataset as ds
|
|
936
1045
|
>>> import mindspore.dataset.text as text
|
|
937
1046
|
>>> from mindspore.dataset.text import NormalizeForm
|
|
938
1047
|
>>>
|
|
939
|
-
>>>
|
|
1048
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1049
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1050
|
+
>>>
|
|
1051
|
+
>>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
|
|
940
1052
|
>>> vocab_list = ["床", "前", "明", "月", "光", "疑", "是", "地", "上", "霜", "举", "头", "望", "低",
|
|
941
1053
|
... "思", "故", "乡","繁", "體", "字", "嘿", "哈", "大", "笑", "嘻", "i", "am", "mak",
|
|
942
1054
|
... "make", "small", "mistake", "##s", "during", "work", "##ing", "hour", "😀", "😃",
|
|
@@ -948,16 +1060,20 @@ if platform.system().lower() != 'windows':
|
|
|
948
1060
|
... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
|
|
949
1061
|
... with_offsets=False)
|
|
950
1062
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
951
|
-
>>> # If with_offsets=True, then output three columns {["token", dtype=str],
|
|
952
|
-
>>> #
|
|
953
|
-
>>> #
|
|
1063
|
+
>>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
|
|
1064
|
+
>>> # ["offsets_start", dtype=uint32],
|
|
1065
|
+
>>> # ["offsets_limit", dtype=uint32]}
|
|
954
1066
|
>>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
|
|
955
1067
|
... unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
|
|
956
1068
|
... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
|
|
957
1069
|
... with_offsets=True)
|
|
958
|
-
>>>
|
|
1070
|
+
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
959
1071
|
... output_columns=["token", "offsets_start",
|
|
960
1072
|
... "offsets_limit"])
|
|
1073
|
+
|
|
1074
|
+
Tutorial Examples:
|
|
1075
|
+
- `Illustration of text transforms
|
|
1076
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
961
1077
|
"""
|
|
962
1078
|
|
|
963
1079
|
@check_bert_tokenizer
|
|
@@ -997,9 +1113,16 @@ if platform.system().lower() != 'windows':
|
|
|
997
1113
|
``CPU``
|
|
998
1114
|
|
|
999
1115
|
Examples:
|
|
1116
|
+
>>> import mindspore.dataset as ds
|
|
1000
1117
|
>>> import mindspore.dataset.text as text
|
|
1001
1118
|
>>> case_op = text.CaseFold()
|
|
1119
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1120
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1002
1121
|
>>> text_file_dataset = text_file_dataset.map(operations=case_op)
|
|
1122
|
+
|
|
1123
|
+
Tutorial Examples:
|
|
1124
|
+
- `Illustration of text transforms
|
|
1125
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1003
1126
|
"""
|
|
1004
1127
|
|
|
1005
1128
|
def parse(self):
|
|
@@ -1018,10 +1141,17 @@ if platform.system().lower() != 'windows':
|
|
|
1018
1141
|
``CPU``
|
|
1019
1142
|
|
|
1020
1143
|
Examples:
|
|
1144
|
+
>>> import mindspore.dataset as ds
|
|
1021
1145
|
>>> import mindspore.dataset.text as text
|
|
1022
1146
|
>>>
|
|
1023
1147
|
>>> replace_op = text.FilterWikipediaXML()
|
|
1148
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1149
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1024
1150
|
>>> text_file_dataset = text_file_dataset.map(operations=replace_op)
|
|
1151
|
+
|
|
1152
|
+
Tutorial Examples:
|
|
1153
|
+
- `Illustration of text transforms
|
|
1154
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1025
1155
|
"""
|
|
1026
1156
|
|
|
1027
1157
|
def parse(self):
|
|
@@ -1030,34 +1160,35 @@ if platform.system().lower() != 'windows':
|
|
|
1030
1160
|
|
|
1031
1161
|
class NormalizeUTF8(TextTensorOperation):
|
|
1032
1162
|
"""
|
|
1033
|
-
|
|
1163
|
+
Normalize the input UTF-8 encoded strings.
|
|
1034
1164
|
|
|
1035
1165
|
Note:
|
|
1036
1166
|
NormalizeUTF8 is not supported on Windows platform yet.
|
|
1037
1167
|
|
|
1038
1168
|
Args:
|
|
1039
|
-
normalize_form (NormalizeForm, optional):
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
See http://unicode.org/reports/tr15/ for details.
|
|
1043
|
-
|
|
1044
|
-
- NormalizeForm.NONE, do nothing for input string tensor.
|
|
1045
|
-
- NormalizeForm.NFC, normalize with Normalization Form C.
|
|
1046
|
-
- NormalizeForm.NFKC, normalize with Normalization Form KC.
|
|
1047
|
-
- NormalizeForm.NFD, normalize with Normalization Form D.
|
|
1048
|
-
- NormalizeForm.NFKD, normalize with Normalization Form KD.
|
|
1169
|
+
normalize_form (NormalizeForm, optional): The desired normalization form.
|
|
1170
|
+
See :class:`~.text.NormalizeForm` for details on optional values.
|
|
1171
|
+
Default: ``NormalizeForm.NFKC`` .
|
|
1049
1172
|
|
|
1050
1173
|
Raises:
|
|
1051
|
-
TypeError: If `normalize_form` is not of type NormalizeForm
|
|
1174
|
+
TypeError: If `normalize_form` is not of type :class:`~.text.NormalizeForm`.
|
|
1052
1175
|
|
|
1053
1176
|
Supported Platforms:
|
|
1054
1177
|
``CPU``
|
|
1055
1178
|
|
|
1056
1179
|
Examples:
|
|
1180
|
+
>>> import mindspore.dataset as ds
|
|
1057
1181
|
>>> import mindspore.dataset.text as text
|
|
1058
1182
|
>>> from mindspore.dataset.text import NormalizeForm
|
|
1183
|
+
>>>
|
|
1059
1184
|
>>> normalize_op = text.NormalizeUTF8(normalize_form=NormalizeForm.NFC)
|
|
1185
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1186
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1060
1187
|
>>> text_file_dataset = text_file_dataset.map(operations=normalize_op)
|
|
1188
|
+
|
|
1189
|
+
Tutorial Examples:
|
|
1190
|
+
- `Illustration of text transforms
|
|
1191
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1061
1192
|
"""
|
|
1062
1193
|
|
|
1063
1194
|
def __init__(self, normalize_form=NormalizeForm.NFKC):
|
|
@@ -1074,33 +1205,39 @@ if platform.system().lower() != 'windows':
|
|
|
1074
1205
|
|
|
1075
1206
|
class RegexReplace(TextTensorOperation):
|
|
1076
1207
|
"""
|
|
1077
|
-
Replace
|
|
1078
|
-
|
|
1079
|
-
See https://unicode-org.github.io/icu/userguide/strings/regexp.html for supported regex pattern.
|
|
1208
|
+
Replace part of the input UTF-8 string with a difference text string using regular expressions.
|
|
1080
1209
|
|
|
1081
1210
|
Note:
|
|
1082
1211
|
RegexReplace is not supported on Windows platform yet.
|
|
1083
1212
|
|
|
1084
1213
|
Args:
|
|
1085
|
-
pattern (str):
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1214
|
+
pattern (str): The regular expression, used to mean the specific, standard textual syntax for
|
|
1215
|
+
representing patterns for matching text.
|
|
1216
|
+
replace (str): The string used to replace the matched elements.
|
|
1217
|
+
replace_all (bool, optional): Whether to replace all matched elements. If ``False``, only the
|
|
1218
|
+
first matched element will be replaced; otherwise, all matched elements will be replaced.
|
|
1219
|
+
Default: ``True``.
|
|
1089
1220
|
|
|
1090
1221
|
Raises:
|
|
1091
|
-
TypeError: If `pattern` is not of type
|
|
1092
|
-
TypeError: If `replace` is not of type
|
|
1222
|
+
TypeError: If `pattern` is not of type str.
|
|
1223
|
+
TypeError: If `replace` is not of type str.
|
|
1093
1224
|
TypeError: If `replace_all` is not of type bool.
|
|
1094
1225
|
|
|
1095
1226
|
Supported Platforms:
|
|
1096
1227
|
``CPU``
|
|
1097
1228
|
|
|
1098
1229
|
Examples:
|
|
1230
|
+
>>> import mindspore.dataset as ds
|
|
1099
1231
|
>>> import mindspore.dataset.text as text
|
|
1100
|
-
>>>
|
|
1101
|
-
>>>
|
|
1102
|
-
>>>
|
|
1103
|
-
>>> text_file_dataset =
|
|
1232
|
+
>>>
|
|
1233
|
+
>>> regex_replace = text.RegexReplace('apple', 'orange')
|
|
1234
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1235
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1236
|
+
>>> text_file_dataset = text_file_dataset.map(operations=regex_replace)
|
|
1237
|
+
|
|
1238
|
+
Tutorial Examples:
|
|
1239
|
+
- `Illustration of text transforms
|
|
1240
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1104
1241
|
"""
|
|
1105
1242
|
|
|
1106
1243
|
@check_regex_replace
|
|
@@ -1128,8 +1265,9 @@ if platform.system().lower() != 'windows':
|
|
|
1128
1265
|
The original string will be split by matched elements.
|
|
1129
1266
|
keep_delim_pattern (str, optional): The string matched by 'delim_pattern' can be kept as a token
|
|
1130
1267
|
if it can be matched by 'keep_delim_pattern'. The default value is an empty str
|
|
1131
|
-
which means that delimiters will not be kept as an output token. Default: ''
|
|
1132
|
-
with_offsets (bool, optional): Whether
|
|
1268
|
+
which means that delimiters will not be kept as an output token. Default: ``''``.
|
|
1269
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
1270
|
+
token in the original string. Default: ``False`` .
|
|
1133
1271
|
|
|
1134
1272
|
Raises:
|
|
1135
1273
|
TypeError: If `delim_pattern` is not of type string.
|
|
@@ -1140,18 +1278,27 @@ if platform.system().lower() != 'windows':
|
|
|
1140
1278
|
``CPU``
|
|
1141
1279
|
|
|
1142
1280
|
Examples:
|
|
1281
|
+
>>> import mindspore.dataset as ds
|
|
1143
1282
|
>>> import mindspore.dataset.text as text
|
|
1144
|
-
>>>
|
|
1283
|
+
>>>
|
|
1284
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1285
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1286
|
+
>>>
|
|
1287
|
+
>>> # 1) If with_offsets=False, default output is one column {["text", dtype=str]}
|
|
1145
1288
|
>>> delim_pattern = r"[ |,]"
|
|
1146
1289
|
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=False)
|
|
1147
1290
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
1148
|
-
>>>
|
|
1149
|
-
>>> #
|
|
1150
|
-
>>> #
|
|
1291
|
+
>>>
|
|
1292
|
+
>>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
|
|
1293
|
+
>>> # ["offsets_start", dtype=uint32],
|
|
1294
|
+
>>> # ["offsets_limit", dtype=uint32]}
|
|
1151
1295
|
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=True)
|
|
1152
|
-
>>>
|
|
1153
|
-
...
|
|
1154
|
-
|
|
1296
|
+
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
1297
|
+
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
1298
|
+
|
|
1299
|
+
Tutorial Examples:
|
|
1300
|
+
- `Illustration of text transforms
|
|
1301
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1155
1302
|
"""
|
|
1156
1303
|
|
|
1157
1304
|
@check_regex_tokenizer
|
|
@@ -1173,8 +1320,9 @@ if platform.system().lower() != 'windows':
|
|
|
1173
1320
|
UnicodeScriptTokenizer is not supported on Windows platform yet.
|
|
1174
1321
|
|
|
1175
1322
|
Args:
|
|
1176
|
-
keep_whitespace (bool, optional): Whether or not emit whitespace tokens. Default: False
|
|
1177
|
-
with_offsets (bool, optional): Whether
|
|
1323
|
+
keep_whitespace (bool, optional): Whether or not emit whitespace tokens. Default: ``False``.
|
|
1324
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
1325
|
+
token in the original string. Default: ``False`` .
|
|
1178
1326
|
|
|
1179
1327
|
Raises:
|
|
1180
1328
|
TypeError: If `keep_whitespace` is not of type bool.
|
|
@@ -1184,17 +1332,27 @@ if platform.system().lower() != 'windows':
|
|
|
1184
1332
|
``CPU``
|
|
1185
1333
|
|
|
1186
1334
|
Examples:
|
|
1335
|
+
>>> import mindspore.dataset as ds
|
|
1187
1336
|
>>> import mindspore.dataset.text as text
|
|
1188
|
-
>>>
|
|
1337
|
+
>>>
|
|
1338
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1339
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1340
|
+
>>>
|
|
1341
|
+
>>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
|
|
1189
1342
|
>>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=False)
|
|
1190
1343
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
1191
|
-
>>>
|
|
1192
|
-
>>> #
|
|
1193
|
-
>>> #
|
|
1344
|
+
>>>
|
|
1345
|
+
>>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
|
|
1346
|
+
>>> # ["offsets_start", dtype=uint32],
|
|
1347
|
+
>>> # ["offsets_limit", dtype=uint32]}
|
|
1194
1348
|
>>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
|
|
1195
1349
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
1196
1350
|
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
1197
1351
|
|
|
1352
|
+
Tutorial Examples:
|
|
1353
|
+
- `Illustration of text transforms
|
|
1354
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1355
|
+
|
|
1198
1356
|
"""
|
|
1199
1357
|
|
|
1200
1358
|
@check_unicode_script_tokenizer
|
|
@@ -1217,7 +1375,8 @@ if platform.system().lower() != 'windows':
|
|
|
1217
1375
|
WhitespaceTokenizer is not supported on Windows platform yet.
|
|
1218
1376
|
|
|
1219
1377
|
Args:
|
|
1220
|
-
with_offsets (bool, optional): Whether
|
|
1378
|
+
with_offsets (bool, optional): Whether to output the start and end offsets of each
|
|
1379
|
+
token in the original string. Default: ``False`` .
|
|
1221
1380
|
|
|
1222
1381
|
Raises:
|
|
1223
1382
|
TypeError: If `with_offsets` is not of type bool.
|
|
@@ -1226,16 +1385,26 @@ if platform.system().lower() != 'windows':
|
|
|
1226
1385
|
``CPU``
|
|
1227
1386
|
|
|
1228
1387
|
Examples:
|
|
1388
|
+
>>> import mindspore.dataset as ds
|
|
1229
1389
|
>>> import mindspore.dataset.text as text
|
|
1230
|
-
>>>
|
|
1390
|
+
>>>
|
|
1391
|
+
>>> text_file_list = ["/path/to/text_file_dataset_file"]
|
|
1392
|
+
>>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
|
|
1393
|
+
>>>
|
|
1394
|
+
>>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
|
|
1231
1395
|
>>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=False)
|
|
1232
1396
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
|
|
1233
|
-
>>>
|
|
1397
|
+
>>>
|
|
1398
|
+
>>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
|
|
1234
1399
|
>>> # ["offsets_start", dtype=uint32],
|
|
1235
1400
|
>>> # ["offsets_limit", dtype=uint32]}
|
|
1236
1401
|
>>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=True)
|
|
1237
1402
|
>>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
|
|
1238
1403
|
... output_columns=["token", "offsets_start", "offsets_limit"])
|
|
1404
|
+
|
|
1405
|
+
Tutorial Examples:
|
|
1406
|
+
- `Illustration of text transforms
|
|
1407
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
|
|
1239
1408
|
"""
|
|
1240
1409
|
|
|
1241
1410
|
@check_with_offsets
|