mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
mindspore/dataset/text/utils.py
CHANGED
|
@@ -29,33 +29,41 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
|
|
|
29
29
|
|
|
30
30
|
class CharNGram(cde.CharNGram):
|
|
31
31
|
"""
|
|
32
|
-
CharNGram
|
|
32
|
+
CharNGram pre-trained word embeddings.
|
|
33
|
+
|
|
34
|
+
A word or sentence is represented using a character n-gram count vector, followed by a single
|
|
35
|
+
nonlinear transformation to yield a low-dimensional embedding.
|
|
33
36
|
"""
|
|
34
37
|
|
|
35
38
|
@classmethod
|
|
36
39
|
@check_from_file_vectors
|
|
37
40
|
def from_file(cls, file_path, max_vectors=None):
|
|
38
41
|
"""
|
|
39
|
-
|
|
42
|
+
Load the CharNGram pre-training vector set file.
|
|
40
43
|
|
|
41
44
|
Args:
|
|
42
|
-
file_path (str): Path
|
|
43
|
-
max_vectors (int, optional):
|
|
45
|
+
file_path (str): Path to the CharNGram pre-training vector set file.
|
|
46
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
44
47
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
45
48
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
46
|
-
|
|
49
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
47
50
|
|
|
48
51
|
Returns:
|
|
49
|
-
CharNGram, CharNGram
|
|
52
|
+
CharNGram, CharNGram pre-training vectors.
|
|
50
53
|
|
|
51
54
|
Raises:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
TypeError: If `max_vectors` is not type
|
|
55
|
+
TypeError: If `file_path` is not of type str.
|
|
56
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
57
|
+
TypeError: If `max_vectors` is not of type int.
|
|
58
|
+
ValueError: If `max_vectors` is negative.
|
|
55
59
|
|
|
56
60
|
Examples:
|
|
57
61
|
>>> import mindspore.dataset.text as text
|
|
62
|
+
>>>
|
|
58
63
|
>>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
|
|
64
|
+
>>> to_vectors = text.ToVectors(char_n_gram)
|
|
65
|
+
>>> # Look up a token into vectors according CharNGram model.
|
|
66
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
59
67
|
"""
|
|
60
68
|
|
|
61
69
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -64,34 +72,40 @@ class CharNGram(cde.CharNGram):
|
|
|
64
72
|
|
|
65
73
|
class FastText(cde.FastText):
|
|
66
74
|
"""
|
|
67
|
-
FastText
|
|
75
|
+
FastText pre-trained word embeddings.
|
|
76
|
+
|
|
77
|
+
FastText allows one to create an unsupervised learning or supervised learning algorithm vector
|
|
78
|
+
representations for words.
|
|
68
79
|
"""
|
|
69
80
|
|
|
70
81
|
@classmethod
|
|
71
82
|
@check_from_file_vectors
|
|
72
83
|
def from_file(cls, file_path, max_vectors=None):
|
|
73
84
|
"""
|
|
74
|
-
|
|
85
|
+
Load the FastText pre-training vector set file.
|
|
75
86
|
|
|
76
87
|
Args:
|
|
77
|
-
file_path (str): Path
|
|
78
|
-
|
|
79
|
-
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
|
88
|
+
file_path (str): Path to the FastText pre-trained vector set file. File suffix should be `*.vec`.
|
|
89
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
80
90
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
81
91
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
82
|
-
|
|
92
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
83
93
|
|
|
84
94
|
Returns:
|
|
85
|
-
FastText, FastText
|
|
95
|
+
FastText, FastText pre-training vectors.
|
|
86
96
|
|
|
87
97
|
Raises:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
TypeError: If `max_vectors` is not type
|
|
98
|
+
TypeError: If `file_path` is not of type str.
|
|
99
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
100
|
+
TypeError: If `max_vectors` is not of type int.
|
|
101
|
+
ValueError: If `max_vectors` is negative.
|
|
91
102
|
|
|
92
103
|
Examples:
|
|
93
104
|
>>> import mindspore.dataset.text as text
|
|
94
105
|
>>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
|
|
106
|
+
>>> to_vectors = text.ToVectors(fast_text)
|
|
107
|
+
>>> # Look up a token into vectors according FastText model.
|
|
108
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
95
109
|
"""
|
|
96
110
|
|
|
97
111
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -100,34 +114,39 @@ class FastText(cde.FastText):
|
|
|
100
114
|
|
|
101
115
|
class GloVe(cde.GloVe):
|
|
102
116
|
"""
|
|
103
|
-
|
|
117
|
+
Global Vectors (GloVe) pre-trained word embeddings.
|
|
118
|
+
|
|
119
|
+
GloVe is an unsupervised learning algorithm for obtaining vector representations for word.
|
|
104
120
|
"""
|
|
105
121
|
|
|
106
122
|
@classmethod
|
|
107
123
|
@check_from_file_vectors
|
|
108
124
|
def from_file(cls, file_path, max_vectors=None):
|
|
109
125
|
"""
|
|
110
|
-
|
|
126
|
+
Load the GloVe pre-training vector set file.
|
|
111
127
|
|
|
112
128
|
Args:
|
|
113
|
-
file_path (str): Path
|
|
114
|
-
|
|
115
|
-
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
|
129
|
+
file_path (str): Path to the GloVe pre-training vector set file. File name is similar to `glove.*.txt`.
|
|
130
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
116
131
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
117
132
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
118
|
-
|
|
133
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
119
134
|
|
|
120
135
|
Returns:
|
|
121
|
-
GloVe, GloVe
|
|
136
|
+
GloVe, GloVe pre-training vectors.
|
|
122
137
|
|
|
123
138
|
Raises:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
TypeError: If `max_vectors` is not type
|
|
139
|
+
TypeError: If `file_path` is not of type str.
|
|
140
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
141
|
+
TypeError: If `max_vectors` is not of type int.
|
|
142
|
+
ValueError: If `max_vectors` is negative.
|
|
127
143
|
|
|
128
144
|
Examples:
|
|
129
145
|
>>> import mindspore.dataset.text as text
|
|
130
146
|
>>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
|
|
147
|
+
>>> to_vectors = text.ToVectors(glove)
|
|
148
|
+
>>> # Look up a token into vectors according GloVe model.
|
|
149
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
131
150
|
"""
|
|
132
151
|
|
|
133
152
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -138,7 +157,7 @@ class JiebaMode(IntEnum):
|
|
|
138
157
|
"""
|
|
139
158
|
An enumeration for :class:`mindspore.dataset.text.JiebaTokenizer` .
|
|
140
159
|
|
|
141
|
-
Possible enumeration values are: JiebaMode.MIX
|
|
160
|
+
Possible enumeration values are: ``JiebaMode.MIX``, ``JiebaMode.MP``, ``JiebaMode.HMM``.
|
|
142
161
|
|
|
143
162
|
- JiebaMode.MIX: tokenize with a mix of MPSegment and HMMSegment algorithm.
|
|
144
163
|
- JiebaMode.MP: tokenize with MPSegment algorithm.
|
|
@@ -152,12 +171,11 @@ class JiebaMode(IntEnum):
|
|
|
152
171
|
|
|
153
172
|
class NormalizeForm(IntEnum):
|
|
154
173
|
"""
|
|
155
|
-
|
|
174
|
+
`Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
|
|
156
175
|
|
|
157
|
-
|
|
158
|
-
and NormalizeForm.NFKD.
|
|
176
|
+
Available values are as follows:
|
|
159
177
|
|
|
160
|
-
- NormalizeForm.NONE:
|
|
178
|
+
- NormalizeForm.NONE: No normalization.
|
|
161
179
|
- NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
|
|
162
180
|
- NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
|
|
163
181
|
- NormalizeForm.NFD: Canonical Decomposition.
|
|
@@ -173,17 +191,14 @@ class NormalizeForm(IntEnum):
|
|
|
173
191
|
|
|
174
192
|
class SentencePieceModel(IntEnum):
|
|
175
193
|
"""
|
|
176
|
-
|
|
194
|
+
Subword algorithms for SentencePiece.
|
|
177
195
|
|
|
178
|
-
|
|
179
|
-
SentencePieceModel.WORD.
|
|
196
|
+
Available values are as follows:
|
|
180
197
|
|
|
181
|
-
- SentencePieceModel.UNIGRAM: Unigram Language Model
|
|
182
|
-
|
|
183
|
-
- SentencePieceModel.
|
|
184
|
-
|
|
185
|
-
- SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
|
|
186
|
-
- SentencePieceModel.WORD: refers to word based sentencePiece Model type.
|
|
198
|
+
- SentencePieceModel.UNIGRAM: `Unigram Language Model <https://arxiv.org/abs/1804.10959>`_ subword algorithm.
|
|
199
|
+
- SentencePieceModel.BPE: `Byte-Pair-Encoding <https://arxiv.org/abs/1508.07909>`_ subword algorithm.
|
|
200
|
+
- SentencePieceModel.CHAR: Character-based subword algorithm.
|
|
201
|
+
- SentencePieceModel.WORD: Word-based subword algorithm.
|
|
187
202
|
"""
|
|
188
203
|
|
|
189
204
|
UNIGRAM = 0
|
|
@@ -218,20 +233,11 @@ class SentencePieceVocab:
|
|
|
218
233
|
dataset (Dataset): Dataset to build SentencePiece.
|
|
219
234
|
col_names (list): The list of the col name.
|
|
220
235
|
vocab_size (int): Vocabulary size.
|
|
221
|
-
character_coverage (float): Amount of characters covered by the model
|
|
222
|
-
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
|
236
|
+
character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
|
|
237
|
+
languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
|
|
223
238
|
character set.
|
|
224
|
-
model_type (SentencePieceModel):
|
|
225
|
-
|
|
226
|
-
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
|
227
|
-
|
|
228
|
-
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
|
229
|
-
be independent of the previous words generated by the model.
|
|
230
|
-
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
|
231
|
-
of bytes in a sentence with a single, unused byte.
|
|
232
|
-
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
|
233
|
-
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
|
234
|
-
|
|
239
|
+
model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
|
|
240
|
+
for details on optional values.
|
|
235
241
|
params (dict): A dictionary with no incoming parameters.
|
|
236
242
|
|
|
237
243
|
Returns:
|
|
@@ -239,10 +245,16 @@ class SentencePieceVocab:
|
|
|
239
245
|
|
|
240
246
|
Examples:
|
|
241
247
|
>>> import mindspore.dataset as ds
|
|
248
|
+
>>> import mindspore.dataset.text as text
|
|
249
|
+
>>>
|
|
242
250
|
>>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
|
|
243
251
|
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
|
244
252
|
>>> vocab = SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
|
|
245
253
|
... SentencePieceModel.UNIGRAM, {})
|
|
254
|
+
>>> # Build tokenizer based on vocab
|
|
255
|
+
>>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
|
|
256
|
+
>>> txt = "Today is Tuesday."
|
|
257
|
+
>>> token = tokenizer(txt)
|
|
246
258
|
"""
|
|
247
259
|
|
|
248
260
|
sentence_piece_vocab = cls()
|
|
@@ -261,20 +273,11 @@ class SentencePieceVocab:
|
|
|
261
273
|
Args:
|
|
262
274
|
file_path (list): Path to the file which contains the SentencePiece list.
|
|
263
275
|
vocab_size (int): Vocabulary size.
|
|
264
|
-
character_coverage (float): Amount of characters covered by the model
|
|
265
|
-
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
|
276
|
+
character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
|
|
277
|
+
languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
|
|
266
278
|
character set.
|
|
267
|
-
model_type (SentencePieceModel):
|
|
268
|
-
|
|
269
|
-
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
|
270
|
-
|
|
271
|
-
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
|
272
|
-
be independent of the previous words generated by the model.
|
|
273
|
-
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
|
274
|
-
of bytes in a sentence with a single, unused byte.
|
|
275
|
-
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
|
276
|
-
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
|
277
|
-
|
|
279
|
+
model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
|
|
280
|
+
for details on optional values.
|
|
278
281
|
params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
|
|
279
282
|
library).
|
|
280
283
|
|
|
@@ -285,6 +288,10 @@ class SentencePieceVocab:
|
|
|
285
288
|
>>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
|
|
286
289
|
>>> vocab = SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
|
287
290
|
... SentencePieceModel.UNIGRAM, {})
|
|
291
|
+
>>> # Build tokenizer based on vocab model
|
|
292
|
+
>>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
|
|
293
|
+
>>> txt = "Today is Friday."
|
|
294
|
+
>>> token = tokenizer(txt)
|
|
288
295
|
"""
|
|
289
296
|
|
|
290
297
|
sentence_piece_vocab = cls()
|
|
@@ -315,12 +322,12 @@ class SentencePieceVocab:
|
|
|
315
322
|
|
|
316
323
|
class SPieceTokenizerLoadType(IntEnum):
|
|
317
324
|
"""
|
|
318
|
-
|
|
325
|
+
Model input type for the SentencePiece tokenizer.
|
|
319
326
|
|
|
320
|
-
|
|
327
|
+
Available values are as follows:
|
|
321
328
|
|
|
322
|
-
- SPieceTokenizerLoadType.FILE: Load
|
|
323
|
-
- SPieceTokenizerLoadType.MODEL: Load
|
|
329
|
+
- SPieceTokenizerLoadType.FILE: Load model from specified file path.
|
|
330
|
+
- SPieceTokenizerLoadType.MODEL: Load model from specified vocab object.
|
|
324
331
|
"""
|
|
325
332
|
|
|
326
333
|
FILE = 0
|
|
@@ -331,7 +338,7 @@ class SPieceTokenizerOutType(IntEnum):
|
|
|
331
338
|
"""
|
|
332
339
|
An enumeration for :class:`mindspore.dataset.text.SentencePieceTokenizer` .
|
|
333
340
|
|
|
334
|
-
Possible enumeration values are: SPieceTokenizerOutType.STRING
|
|
341
|
+
Possible enumeration values are: ``SPieceTokenizerOutType.STRING``, ``SPieceTokenizerOutType.INT``.
|
|
335
342
|
|
|
336
343
|
- SPieceTokenizerOutType.STRING: means output type of SentencePiece Tokenizer is string.
|
|
337
344
|
- SPieceTokenizerOutType.INT: means output type of SentencePiece Tokenizer is int.
|
|
@@ -343,33 +350,37 @@ class SPieceTokenizerOutType(IntEnum):
|
|
|
343
350
|
|
|
344
351
|
class Vectors(cde.Vectors):
|
|
345
352
|
"""
|
|
346
|
-
|
|
353
|
+
Pre-trained word embeddings.
|
|
347
354
|
"""
|
|
348
355
|
|
|
349
356
|
@classmethod
|
|
350
357
|
@check_from_file_vectors
|
|
351
358
|
def from_file(cls, file_path, max_vectors=None):
|
|
352
359
|
"""
|
|
353
|
-
|
|
360
|
+
Load a pre-training vector set file.
|
|
354
361
|
|
|
355
362
|
Args:
|
|
356
|
-
file_path (str): Path
|
|
357
|
-
max_vectors (int, optional):
|
|
363
|
+
file_path (str): Path to the pre-training vector set file.
|
|
364
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
358
365
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
359
366
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
360
|
-
|
|
367
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
361
368
|
|
|
362
369
|
Returns:
|
|
363
|
-
Vectors,
|
|
370
|
+
Vectors, pre-training vectors.
|
|
364
371
|
|
|
365
372
|
Raises:
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
TypeError: If `max_vectors` is not type
|
|
373
|
+
TypeError: If `file_path` is not of type str.
|
|
374
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
375
|
+
TypeError: If `max_vectors` is not of type int.
|
|
376
|
+
ValueError: If `max_vectors` is negative.
|
|
369
377
|
|
|
370
378
|
Examples:
|
|
371
379
|
>>> import mindspore.dataset.text as text
|
|
372
380
|
>>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
|
|
381
|
+
>>> to_vectors = text.ToVectors(vector)
|
|
382
|
+
>>> # Look up a token into vectors according Vector model.
|
|
383
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
373
384
|
"""
|
|
374
385
|
|
|
375
386
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -378,9 +389,9 @@ class Vectors(cde.Vectors):
|
|
|
378
389
|
|
|
379
390
|
class Vocab:
|
|
380
391
|
"""
|
|
381
|
-
Vocab
|
|
392
|
+
Create Vocab for training NLP models.
|
|
382
393
|
|
|
383
|
-
|
|
394
|
+
Vocab is a collection of all possible Tokens in the data, preserving the mapping between each Token and its ID.
|
|
384
395
|
"""
|
|
385
396
|
|
|
386
397
|
def __init__(self):
|
|
@@ -390,42 +401,52 @@ class Vocab:
|
|
|
390
401
|
@check_from_dataset
|
|
391
402
|
def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
|
|
392
403
|
"""
|
|
393
|
-
Build a Vocab from a dataset.
|
|
404
|
+
Build a Vocab from a given dataset.
|
|
394
405
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency
|
|
398
|
-
would be ordered lexicographically.
|
|
406
|
+
The samples in the dataset are used as a corpus to create Vocab, in which the Token is arranged in ascending
|
|
407
|
+
order of Token frequency, and Tokens with the same frequency are arranged in alphabetical order.
|
|
399
408
|
|
|
400
409
|
Args:
|
|
401
|
-
dataset (Dataset): dataset to build
|
|
402
|
-
columns (list[str], optional):
|
|
403
|
-
Default: None.
|
|
404
|
-
freq_range (tuple, optional):
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
special_tokens (list, optional): A list of
|
|
413
|
-
|
|
414
|
-
special_first (bool, optional): Whether
|
|
415
|
-
|
|
416
|
-
Default: True.
|
|
410
|
+
dataset (Dataset): The dataset to build the Vocab from.
|
|
411
|
+
columns (list[str], optional): The name of the data columns used to create the Vocab.
|
|
412
|
+
Default: ``None`` , use all columns.
|
|
413
|
+
freq_range (tuple[int, int], optional): The Token frequency range used to create the Vocab. Must contain
|
|
414
|
+
two elements representing the minimum and maximum frequencies, within which the Token will be retained.
|
|
415
|
+
When the minimum or maximum frequency is None, it means there is no minimum or maximum frequency limit.
|
|
416
|
+
Default: ``None`` , no Token frequency range restriction.
|
|
417
|
+
top_k (int, optional): Only the first specified number of Tokens with the highest Token frequency are
|
|
418
|
+
selected to build the Vocab. This operation will be performed after Token frequency filtering. If
|
|
419
|
+
the value is greater than the total number of Tokens, all Tokens will be retained. Default: ``None`` ,
|
|
420
|
+
there is no limit to the number of Tokens.
|
|
421
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
422
|
+
no special Token is appended.
|
|
423
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
424
|
+
the bottom of the Vocab. Default: ``True``.
|
|
417
425
|
|
|
418
426
|
Returns:
|
|
419
|
-
Vocab, Vocab
|
|
427
|
+
Vocab, Vocab built from the dataset.
|
|
428
|
+
|
|
429
|
+
Raises:
|
|
430
|
+
TypeError: If `columns` is not of type list[str].
|
|
431
|
+
TypeError: If `freq_range` is not of type tuple[int, int]l.
|
|
432
|
+
ValueError: If element of `freq_range` is negative.
|
|
433
|
+
TypeError: If `top_k` is not of type int.
|
|
434
|
+
ValueError: If `top_k` is not positive.
|
|
435
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
436
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
437
|
+
TypeError: If `special_first` is not of type bool.
|
|
420
438
|
|
|
421
439
|
Examples:
|
|
422
440
|
>>> import mindspore.dataset as ds
|
|
423
441
|
>>> import mindspore.dataset.text as text
|
|
442
|
+
>>>
|
|
424
443
|
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
|
425
444
|
>>> vocab = text.Vocab.from_dataset(dataset, "text", freq_range=None, top_k=None,
|
|
426
445
|
... special_tokens=["<pad>", "<unk>"],
|
|
427
446
|
... special_first=True)
|
|
428
|
-
>>>
|
|
447
|
+
>>> # Use the vocab to look up string to id
|
|
448
|
+
>>> lookup = text.Lookup(vocab, "<unk>")
|
|
449
|
+
>>> id = lookup("text1")
|
|
429
450
|
"""
|
|
430
451
|
|
|
431
452
|
vocab = cls()
|
|
@@ -437,21 +458,30 @@ class Vocab:
|
|
|
437
458
|
@check_from_list
|
|
438
459
|
def from_list(cls, word_list, special_tokens=None, special_first=True):
|
|
439
460
|
"""
|
|
440
|
-
Build a
|
|
461
|
+
Build a Vocab from a given Token list.
|
|
441
462
|
|
|
442
463
|
Args:
|
|
443
|
-
word_list (list):
|
|
444
|
-
special_tokens (list, optional): A list of
|
|
445
|
-
|
|
446
|
-
special_first (bool, optional): Whether
|
|
447
|
-
|
|
464
|
+
word_list (list[str]): The Token list to build the Vocab from.
|
|
465
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
466
|
+
no special Token is appended.
|
|
467
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
468
|
+
the bottom of the Vocab. Default: ``True``.
|
|
448
469
|
|
|
449
470
|
Returns:
|
|
450
|
-
Vocab, Vocab
|
|
471
|
+
Vocab, Vocab built from the list.
|
|
472
|
+
|
|
473
|
+
Raises:
|
|
474
|
+
TypeError: If `word_list` is not of type list[str].
|
|
475
|
+
ValueError: If there are duplicate elements in `word_list`.
|
|
476
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
477
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
478
|
+
TypeError: If `special_first` is not of type bool.
|
|
451
479
|
|
|
452
480
|
Examples:
|
|
453
481
|
>>> import mindspore.dataset.text as text
|
|
454
482
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
483
|
+
>>> # look up strings to ids
|
|
484
|
+
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
|
455
485
|
"""
|
|
456
486
|
|
|
457
487
|
if special_tokens is None:
|
|
@@ -464,21 +494,29 @@ class Vocab:
|
|
|
464
494
|
@check_from_file
|
|
465
495
|
def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
|
|
466
496
|
"""
|
|
467
|
-
Build a
|
|
497
|
+
Build a Vocab from a file.
|
|
468
498
|
|
|
469
499
|
Args:
|
|
470
|
-
file_path (str):
|
|
471
|
-
delimiter (str, optional):
|
|
472
|
-
|
|
473
|
-
vocab_size (int, optional):
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
500
|
+
file_path (str): The path of the file to build the Vocab from.
|
|
501
|
+
delimiter (str, optional): The separator for the Token in the file line. The string before the separator
|
|
502
|
+
will be treated as a Token. Default: ``''``, the whole line will be treated as a Token.
|
|
503
|
+
vocab_size (int, optional): The upper limit on the number of Tokens that Vocab can contain.
|
|
504
|
+
Default: ``None`` , no upper limit on the number of Token.
|
|
505
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
506
|
+
no special Token is appended.
|
|
507
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
508
|
+
the bottom of the Vocab. Default: ``True``.
|
|
479
509
|
|
|
480
510
|
Returns:
|
|
481
|
-
Vocab, Vocab
|
|
511
|
+
Vocab, Vocab built from the file.
|
|
512
|
+
|
|
513
|
+
Raises:
|
|
514
|
+
TypeError: If `file_path` is not of type str.
|
|
515
|
+
TypeError: If `delimiter` is not of type str.
|
|
516
|
+
ValueError: If `vocab_size` is not positive.
|
|
517
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
518
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
519
|
+
TypeError: If `special_first` is not of type bool.
|
|
482
520
|
|
|
483
521
|
Examples:
|
|
484
522
|
>>> import mindspore.dataset.text as text
|
|
@@ -495,6 +533,9 @@ class Vocab:
|
|
|
495
533
|
>>>
|
|
496
534
|
>>> # Finally, there are 5 words in the vocab: "<pad>", "<unk>", "apple", "banana", "cat".
|
|
497
535
|
>>> vocabulary = vocab.vocab()
|
|
536
|
+
>>>
|
|
537
|
+
>>> # look up strings to ids
|
|
538
|
+
>>> ids = vocab.tokens_to_ids(["apple", "banana"])
|
|
498
539
|
"""
|
|
499
540
|
|
|
500
541
|
if vocab_size is None:
|
|
@@ -509,18 +550,26 @@ class Vocab:
|
|
|
509
550
|
@check_from_dict
|
|
510
551
|
def from_dict(cls, word_dict):
|
|
511
552
|
"""
|
|
512
|
-
Build a
|
|
553
|
+
Build a Vocab from a given dictionary.
|
|
513
554
|
|
|
514
555
|
Args:
|
|
515
|
-
word_dict (dict):
|
|
516
|
-
to start from 0 and be continuous. ValueError will be raised if id is negative.
|
|
556
|
+
word_dict (dict[str, int]): A dictionary storing the mappings between each Token and its ID.
|
|
517
557
|
|
|
518
558
|
Returns:
|
|
519
|
-
Vocab, Vocab
|
|
559
|
+
Vocab, Vocab built from the dictionary.
|
|
560
|
+
|
|
561
|
+
Raises:
|
|
562
|
+
TypeError: If `word_dict` is not of type dict[str, int].
|
|
563
|
+
ValueError: If key value of `word_dict` is negative.
|
|
520
564
|
|
|
521
565
|
Examples:
|
|
522
566
|
>>> import mindspore.dataset.text as text
|
|
523
567
|
>>> vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
|
|
568
|
+
>>>
|
|
569
|
+
>>> # look up ids to string
|
|
570
|
+
>>> tokens = vocab.ids_to_tokens([3, 4, 5])
|
|
571
|
+
>>> print(tokens)
|
|
572
|
+
['home', 'the', 'world']
|
|
524
573
|
"""
|
|
525
574
|
|
|
526
575
|
vocab = cls()
|
|
@@ -529,15 +578,17 @@ class Vocab:
|
|
|
529
578
|
|
|
530
579
|
def vocab(self):
|
|
531
580
|
"""
|
|
532
|
-
Get the
|
|
581
|
+
Get the dictionary of the mappings between Tokens and its IDs.
|
|
533
582
|
|
|
534
583
|
Returns:
|
|
535
|
-
|
|
584
|
+
dict[str, int], the dictionary of mappings between Tokens and IDs.
|
|
536
585
|
|
|
537
586
|
Examples:
|
|
538
587
|
>>> import mindspore.dataset.text as text
|
|
539
588
|
>>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
|
|
540
589
|
>>> vocabory_dict = vocab.vocab()
|
|
590
|
+
>>> print(sorted(vocabory_dict.items()))
|
|
591
|
+
[('word_1', 0), ('word_2', 1), ('word_3', 2), ('word_4', 3)]
|
|
541
592
|
"""
|
|
542
593
|
check_vocab(self.c_vocab)
|
|
543
594
|
return self.c_vocab.vocab()
|
|
@@ -545,19 +596,24 @@ class Vocab:
|
|
|
545
596
|
@check_tokens_to_ids
|
|
546
597
|
def tokens_to_ids(self, tokens):
|
|
547
598
|
"""
|
|
548
|
-
|
|
549
|
-
If token does not exist, return id with value -1.
|
|
599
|
+
Look up the ID corresponding to the specified Token.
|
|
550
600
|
|
|
551
601
|
Args:
|
|
552
|
-
tokens (Union[str, list[str]]):
|
|
602
|
+
tokens (Union[str, list[str], numpy.ndarray]): The Token or list of Tokens to be looked up.
|
|
603
|
+
If the Token does not exist, -1 is returned.
|
|
553
604
|
|
|
554
605
|
Returns:
|
|
555
|
-
|
|
606
|
+
Union[int, list[int]], the ID(s) corresponding to the Token(s).
|
|
607
|
+
|
|
608
|
+
Raises:
|
|
609
|
+
TypeError: If `tokens` is not of type Union[str, list[str], numpy.ndarray].
|
|
556
610
|
|
|
557
611
|
Examples:
|
|
558
612
|
>>> import mindspore.dataset.text as text
|
|
559
613
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
560
614
|
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
|
615
|
+
>>> print(ids)
|
|
616
|
+
[1, 3]
|
|
561
617
|
"""
|
|
562
618
|
check_vocab(self.c_vocab)
|
|
563
619
|
if isinstance(tokens, np.ndarray):
|
|
@@ -569,19 +625,25 @@ class Vocab:
|
|
|
569
625
|
@check_ids_to_tokens
|
|
570
626
|
def ids_to_tokens(self, ids):
|
|
571
627
|
"""
|
|
572
|
-
|
|
573
|
-
If id does not exist, return empty string.
|
|
628
|
+
Look up the Token corresponding to the specified ID.
|
|
574
629
|
|
|
575
630
|
Args:
|
|
576
|
-
ids (Union[int, list[int]]): The
|
|
631
|
+
ids (Union[int, list[int], numpy.ndarray]): The ID or list of IDs to be looked up.
|
|
632
|
+
If the ID does not exist, an empty string is returned.
|
|
577
633
|
|
|
578
634
|
Returns:
|
|
579
|
-
|
|
635
|
+
Union[str, list[str]], the Token(s) corresponding to the ID(s).
|
|
636
|
+
|
|
637
|
+
Raises:
|
|
638
|
+
TypeError: If `ids` is not of type Union[int, list[int], numpy.ndarray].
|
|
639
|
+
ValueError: If element of `ids` is negative.
|
|
580
640
|
|
|
581
641
|
Examples:
|
|
582
642
|
>>> import mindspore.dataset.text as text
|
|
583
643
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
584
|
-
>>> token = vocab.ids_to_tokens(
|
|
644
|
+
>>> token = vocab.ids_to_tokens(1)
|
|
645
|
+
>>> print(token)
|
|
646
|
+
w1
|
|
585
647
|
"""
|
|
586
648
|
check_vocab(self.c_vocab)
|
|
587
649
|
if isinstance(ids, np.ndarray):
|
|
@@ -597,7 +659,7 @@ def to_bytes(array, encoding='utf8'):
|
|
|
597
659
|
|
|
598
660
|
Args:
|
|
599
661
|
array (numpy.ndarray): Array of `str` type representing strings.
|
|
600
|
-
encoding (str): Indicating the charset for encoding. Default: 'utf8'
|
|
662
|
+
encoding (str): Indicating the charset for encoding. Default: ``'utf8'``.
|
|
601
663
|
|
|
602
664
|
Returns:
|
|
603
665
|
numpy.ndarray, NumPy array of `bytes` .
|
|
@@ -605,11 +667,15 @@ def to_bytes(array, encoding='utf8'):
|
|
|
605
667
|
Examples:
|
|
606
668
|
>>> import numpy as np
|
|
607
669
|
>>> import mindspore.dataset as ds
|
|
670
|
+
>>> import mindspore.dataset.text as text
|
|
608
671
|
>>>
|
|
609
672
|
>>> data = np.array([["1", "2", "3"]], dtype=np.str_)
|
|
610
673
|
>>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
|
|
674
|
+
>>> result = []
|
|
611
675
|
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
612
|
-
...
|
|
676
|
+
... result.append(text.to_bytes(item["text"]))
|
|
677
|
+
>>> print(result)
|
|
678
|
+
[array([b'1', b'2', b'3'], dtype='|S1')]
|
|
613
679
|
"""
|
|
614
680
|
|
|
615
681
|
if not isinstance(array, np.ndarray):
|
|
@@ -624,7 +690,7 @@ def to_str(array, encoding='utf8'):
|
|
|
624
690
|
|
|
625
691
|
Args:
|
|
626
692
|
array (numpy.ndarray): Array of `bytes` type representing strings.
|
|
627
|
-
encoding (str): Indicating the charset for decoding. Default: 'utf8'
|
|
693
|
+
encoding (str): Indicating the charset for decoding. Default: ``'utf8'``.
|
|
628
694
|
|
|
629
695
|
Returns:
|
|
630
696
|
numpy.ndarray, NumPy array of `str` .
|
|
@@ -632,11 +698,15 @@ def to_str(array, encoding='utf8'):
|
|
|
632
698
|
Examples:
|
|
633
699
|
>>> import numpy as np
|
|
634
700
|
>>> import mindspore.dataset as ds
|
|
701
|
+
>>> import mindspore.dataset.text as text
|
|
635
702
|
>>>
|
|
636
703
|
>>> data = np.array([["1", "2", "3"]], dtype=np.bytes_)
|
|
637
704
|
>>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
|
|
705
|
+
>>> result = []
|
|
638
706
|
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
639
|
-
...
|
|
707
|
+
... result.append(text.to_str(item["text"]))
|
|
708
|
+
>>> print(result)
|
|
709
|
+
[array(['1', '2', '3'], dtype='<U1')]
|
|
640
710
|
"""
|
|
641
711
|
|
|
642
712
|
if not isinstance(array, np.ndarray):
|