PyPI - mindspore - Versions diffs - 2.1.0__cp37-cp37m-win_amd64.whl → 2.2.11__cp37-cp37m-win_amd64.whl - Mend

mindspore 2.1.0__cp37-cp37m-win_amd64.whl → 2.2.11__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (511) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +4 -1
mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -1
mindspore/_checkparam.py +23 -29
mindspore/_extends/graph_kernel/__init__.py +0 -1
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
mindspore/_extends/graph_kernel/splitter.py +4 -11
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
mindspore/_extends/parse/__init__.py +13 -15
mindspore/_extends/parse/namespace.py +7 -33
mindspore/_extends/parse/parser.py +67 -72
mindspore/_extends/parse/resources.py +1 -1
mindspore/_extends/parse/standard_method.py +86 -106
mindspore/_extends/parse/trope.py +1 -1
mindspore/_extends/remote/kernel_build_server.py +25 -7
mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
mindspore/_install_custom.py +43 -0
mindspore/amp.py +47 -11
mindspore/atlprov.dll +0 -0
mindspore/boost/boost.py +1 -8
mindspore/boost/boost_cell_wrapper.py +3 -2
mindspore/boost/grad_accumulation.py +1 -1
mindspore/boost/group_loss_scale_manager.py +8 -7
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +5 -3
mindspore/common/_jit_fallback_utils.py +6 -0
mindspore/common/_register_for_adapter.py +2 -0
mindspore/common/_register_for_tensor.py +2 -2
mindspore/common/_stub_tensor.py +13 -0
mindspore/common/_utils.py +29 -0
mindspore/common/api.py +174 -259
mindspore/common/auto_dynamic_shape.py +494 -0
mindspore/common/dtype.py +18 -11
mindspore/common/dump.py +6 -4
mindspore/common/initializer.py +14 -14
mindspore/common/jit_config.py +33 -15
mindspore/common/lazy_inline.py +126 -7
mindspore/common/mindir_util.py +101 -0
mindspore/common/parameter.py +51 -41
mindspore/common/seed.py +4 -4
mindspore/common/sparse_tensor.py +13 -14
mindspore/common/tensor.py +243 -165
mindspore/communication/__init__.py +7 -4
mindspore/communication/_comm_helper.py +83 -4
mindspore/communication/management.py +152 -84
mindspore/config/op_info.config +14 -3
mindspore/context.py +152 -61
mindspore/dataset/__init__.py +5 -5
mindspore/dataset/audio/__init__.py +2 -2
mindspore/dataset/audio/transforms.py +52 -52
mindspore/dataset/callback/ds_callback.py +16 -2
mindspore/dataset/core/config.py +68 -51
mindspore/dataset/engine/cache_client.py +33 -7
mindspore/dataset/engine/datasets.py +250 -112
mindspore/dataset/engine/datasets_audio.py +43 -211
mindspore/dataset/engine/datasets_standard_format.py +16 -35
mindspore/dataset/engine/datasets_text.py +43 -67
mindspore/dataset/engine/datasets_user_defined.py +86 -100
mindspore/dataset/engine/datasets_vision.py +219 -1029
mindspore/dataset/engine/iterators.py +11 -4
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
mindspore/dataset/engine/obs/util.py +3 -0
mindspore/dataset/engine/samplers.py +1 -1
mindspore/dataset/engine/validators.py +19 -5
mindspore/dataset/text/__init__.py +3 -3
mindspore/dataset/text/transforms.py +101 -127
mindspore/dataset/text/utils.py +205 -138
mindspore/dataset/transforms/__init__.py +1 -1
mindspore/dataset/transforms/py_transforms_util.py +40 -12
mindspore/dataset/transforms/transforms.py +95 -40
mindspore/dataset/utils/browse_dataset.py +8 -2
mindspore/dataset/utils/line_reader.py +17 -19
mindspore/dataset/vision/__init__.py +3 -3
mindspore/dataset/vision/c_transforms.py +6 -3
mindspore/dataset/vision/transforms.py +409 -287
mindspore/dataset/vision/utils.py +13 -14
mindspore/dataset/vision/validators.py +11 -1
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/map_parameter.py +14 -0
mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
mindspore/experimental/optim/lr_scheduler.py +1427 -0
mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
mindspore/gen_ops.py +273 -0
mindspore/include/OWNERS +0 -1
mindspore/include/api/data_type.h +2 -1
mindspore/include/api/graph.h +0 -15
mindspore/include/api/kernel.h +2 -0
mindspore/include/api/kernel_api.h +37 -12
mindspore/include/api/model.h +17 -14
mindspore/include/api/status.h +8 -3
mindspore/include/api/types.h +37 -4
mindspore/include/c_api/ms/abstract.h +67 -0
mindspore/include/c_api/ms/attribute.h +197 -0
mindspore/include/c_api/ms/base/handle_types.h +43 -0
mindspore/include/c_api/ms/base/macros.h +32 -0
mindspore/include/c_api/ms/base/status.h +33 -0
mindspore/include/c_api/ms/base/types.h +282 -0
mindspore/include/c_api/ms/context.h +102 -0
mindspore/include/c_api/ms/graph.h +160 -0
mindspore/include/c_api/ms/node.h +606 -0
mindspore/include/c_api/ms/tensor.h +161 -0
mindspore/include/c_api/ms/value.h +84 -0
mindspore/include/dataset/constants.h +6 -5
mindspore/include/dataset/execute.h +23 -13
mindspore/include/dataset/text.h +26 -26
mindspore/include/dataset/transforms.h +13 -13
mindspore/include/dataset/vision.h +60 -60
mindspore/include/dataset/vision_ascend.h +5 -6
mindspore/include/dataset/vision_lite.h +17 -17
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/__init__.py +0 -2
mindspore/nn/cell.py +313 -74
mindspore/nn/dynamic_lr.py +21 -21
mindspore/nn/layer/activation.py +22 -30
mindspore/nn/layer/basic.py +15 -13
mindspore/nn/layer/channel_shuffle.py +1 -1
mindspore/nn/layer/container.py +271 -9
mindspore/nn/layer/conv.py +323 -204
mindspore/nn/layer/dense.py +8 -5
mindspore/nn/layer/embedding.py +33 -27
mindspore/nn/layer/flash_attention.py +61 -95
mindspore/nn/layer/image.py +8 -6
mindspore/nn/layer/math.py +16 -25
mindspore/nn/layer/normalization.py +107 -66
mindspore/nn/layer/padding.py +1 -1
mindspore/nn/layer/pooling.py +131 -109
mindspore/nn/layer/rnn_cells.py +27 -22
mindspore/nn/layer/rnns.py +13 -16
mindspore/nn/layer/thor_layer.py +1 -1
mindspore/nn/layer/transformer.py +221 -154
mindspore/nn/learning_rate_schedule.py +9 -1
mindspore/nn/loss/loss.py +235 -174
mindspore/nn/optim/ada_grad.py +2 -1
mindspore/nn/optim/adadelta.py +1 -0
mindspore/nn/optim/adafactor.py +2 -1
mindspore/nn/optim/adam.py +7 -4
mindspore/nn/optim/adamax.py +3 -2
mindspore/nn/optim/adasum.py +2 -2
mindspore/nn/optim/asgd.py +2 -3
mindspore/nn/optim/ftrl.py +6 -5
mindspore/nn/optim/lamb.py +7 -4
mindspore/nn/optim/lars.py +1 -1
mindspore/nn/optim/lazyadam.py +5 -3
mindspore/nn/optim/momentum.py +2 -1
mindspore/nn/optim/optimizer.py +53 -4
mindspore/nn/optim/proximal_ada_grad.py +3 -4
mindspore/nn/optim/rmsprop.py +4 -3
mindspore/nn/optim/rprop.py +23 -12
mindspore/nn/optim/sgd.py +26 -11
mindspore/nn/optim/thor.py +9 -7
mindspore/nn/probability/bijector/bijector.py +5 -5
mindspore/nn/probability/bijector/power_transform.py +27 -27
mindspore/nn/probability/bijector/softplus.py +3 -3
mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
mindspore/nn/probability/distribution/bernoulli.py +5 -5
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +7 -7
mindspore/nn/probability/distribution/cauchy.py +0 -1
mindspore/nn/probability/distribution/distribution.py +3 -3
mindspore/nn/probability/distribution/gamma.py +3 -3
mindspore/nn/probability/distribution/geometric.py +4 -4
mindspore/nn/probability/distribution/gumbel.py +4 -4
mindspore/nn/probability/distribution/log_normal.py +2 -2
mindspore/nn/probability/distribution/logistic.py +2 -2
mindspore/nn/probability/distribution/poisson.py +4 -4
mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
mindspore/nn/probability/distribution/uniform.py +6 -6
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +87 -34
mindspore/nn/wrap/grad_reducer.py +8 -5
mindspore/nn/wrap/loss_scale.py +105 -42
mindspore/numpy/array_creations.py +1 -2
mindspore/numpy/array_ops.py +3 -2
mindspore/numpy/utils_const.py +5 -5
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/_grad_experimental/__init__.py +0 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
mindspore/ops/_op_impl/tbe/__init__.py +4 -4
mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
mindspore/ops/_primitive_cache.py +1 -1
mindspore/ops/_tracefunc.py +45 -13
mindspore/ops/_utils/utils.py +6 -1
mindspore/ops/_vmap/vmap_array_ops.py +3 -3
mindspore/ops/_vmap/vmap_base.py +3 -3
mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
mindspore/ops/_vmap/vmap_math_ops.py +5 -2
mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
mindspore/ops/arg_dtype_cast.py +54 -0
mindspore/ops/composite/base.py +37 -10
mindspore/ops/composite/math_ops.py +5 -4
mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
mindspore/ops/deprecated.py +304 -0
mindspore/ops/function/__init__.py +4 -1
mindspore/ops/function/array_func.py +174 -193
mindspore/ops/function/clip_func.py +81 -13
mindspore/ops/function/debug_func.py +1 -1
mindspore/ops/function/grad/grad_func.py +18 -9
mindspore/ops/function/image_func.py +10 -4
mindspore/ops/function/linalg_func.py +5 -5
mindspore/ops/function/math_func.py +575 -386
mindspore/ops/function/nn_func.py +568 -260
mindspore/ops/function/random_func.py +88 -57
mindspore/ops/function/sparse_func.py +1 -1
mindspore/ops/function/sparse_unary_func.py +14 -12
mindspore/ops/function/vmap_func.py +6 -5
mindspore/ops/functional.py +15 -10
mindspore/ops/op_info_register.py +244 -25
mindspore/ops/operations/__init__.py +31 -19
mindspore/ops/operations/_grad_ops.py +71 -7
mindspore/ops/operations/_inner_ops.py +350 -17
mindspore/ops/operations/_quant_ops.py +4 -8
mindspore/ops/operations/_sequence_ops.py +42 -0
mindspore/ops/operations/array_ops.py +68 -282
mindspore/ops/operations/comm_ops.py +107 -59
mindspore/ops/operations/custom_ops.py +94 -70
mindspore/ops/operations/debug_ops.py +8 -4
mindspore/ops/operations/image_ops.py +18 -12
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +192 -144
mindspore/ops/operations/nn_ops.py +857 -489
mindspore/ops/operations/other_ops.py +0 -22
mindspore/ops/operations/random_ops.py +53 -111
mindspore/ops/operations/sparse_ops.py +3 -1
mindspore/ops/primitive.py +24 -18
mindspore/parallel/_auto_parallel_context.py +68 -8
mindspore/parallel/_cost_model_context.py +2 -2
mindspore/parallel/_offload_context.py +17 -3
mindspore/parallel/_parallel_serialization.py +12 -5
mindspore/parallel/_ps_context.py +12 -0
mindspore/parallel/_tensor.py +18 -13
mindspore/parallel/_transformer/layers.py +5 -3
mindspore/parallel/_transformer/loss.py +1 -0
mindspore/parallel/_transformer/moe.py +2 -2
mindspore/parallel/_transformer/op_parallel_config.py +12 -1
mindspore/parallel/_transformer/transformer.py +23 -3
mindspore/parallel/_utils.py +11 -7
mindspore/parallel/algo_parameter_config.py +85 -5
mindspore/parallel/checkpoint_transform.py +19 -12
mindspore/parallel/shard.py +21 -14
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/common/struct_type.py +3 -3
mindspore/profiler/common/util.py +4 -2
mindspore/profiler/envprofiling.py +1 -1
mindspore/profiler/parser/aicpu_data_parser.py +5 -3
mindspore/profiler/parser/ascend_flops_generator.py +2 -2
mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
mindspore/profiler/parser/ascend_op_generator.py +6 -6
mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
mindspore/profiler/parser/base_timeline_generator.py +10 -8
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
mindspore/profiler/parser/flops_parser.py +15 -11
mindspore/profiler/parser/framework_parser.py +38 -22
mindspore/profiler/parser/hccl_parser.py +16 -12
mindspore/profiler/parser/integrator.py +22 -11
mindspore/profiler/parser/memory_usage_parser.py +2 -2
mindspore/profiler/parser/minddata_analyzer.py +12 -14
mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
mindspore/profiler/parser/msadvisor_parser.py +8 -4
mindspore/profiler/parser/op_intermediate_parser.py +5 -2
mindspore/profiler/parser/optime_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +21 -2
mindspore/profiler/parser/step_trace_parser.py +11 -14
mindspore/profiler/profiling.py +179 -89
mindspore/rewrite/api/node.py +102 -19
mindspore/rewrite/api/node_type.py +5 -1
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/api/scoped_value.py +9 -17
mindspore/rewrite/api/symbol_tree.py +131 -47
mindspore/rewrite/ast_helpers/__init__.py +2 -1
mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
mindspore/rewrite/common/rewrite_elog.py +5 -1
mindspore/rewrite/namer.py +33 -24
mindspore/rewrite/namespace.py +14 -5
mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
mindspore/rewrite/node/call_function.py +79 -0
mindspore/rewrite/node/cell_container.py +135 -0
mindspore/rewrite/node/control_flow.py +88 -0
mindspore/rewrite/{node.py → node/node.py} +273 -234
mindspore/rewrite/node/node_manager.py +254 -0
mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
mindspore/rewrite/parsers/arguments_parser.py +22 -21
mindspore/rewrite/parsers/assign_parser.py +216 -221
mindspore/rewrite/parsers/attribute_parser.py +9 -7
mindspore/rewrite/parsers/class_def_parser.py +174 -113
mindspore/rewrite/parsers/constant_parser.py +9 -6
mindspore/rewrite/parsers/container_parser.py +9 -7
mindspore/rewrite/parsers/for_parser.py +42 -21
mindspore/rewrite/parsers/function_def_parser.py +24 -16
mindspore/rewrite/parsers/if_parser.py +28 -24
mindspore/rewrite/parsers/module_parser.py +196 -25
mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
mindspore/rewrite/parsers/return_parser.py +6 -6
mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
mindspore/rewrite/sparsify/utils.py +1 -1
mindspore/rewrite/symbol_tree.py +523 -578
mindspore/rewrite/symbol_tree_builder.py +9 -193
mindspore/rewrite/symbol_tree_dumper.py +2 -2
mindspore/run_check/_check_version.py +6 -4
mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
mindspore/safeguard/rewrite_obfuscation.py +541 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/_utils.py +7 -3
mindspore/train/amp.py +323 -123
mindspore/train/anf_ir_pb2.py +14 -2
mindspore/train/callback/_backup_and_restore.py +2 -12
mindspore/train/callback/_callback.py +29 -4
mindspore/train/callback/_checkpoint.py +23 -8
mindspore/train/callback/_early_stop.py +2 -2
mindspore/train/callback/_landscape.py +4 -4
mindspore/train/callback/_loss_monitor.py +2 -2
mindspore/train/callback/_on_request_exit.py +2 -2
mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
mindspore/train/callback/_summary_collector.py +15 -8
mindspore/train/callback/_time_monitor.py +58 -5
mindspore/train/data_sink.py +5 -11
mindspore/train/dataset_helper.py +84 -57
mindspore/train/loss_scale_manager.py +2 -2
mindspore/train/metrics/__init__.py +3 -3
mindspore/train/metrics/cosine_similarity.py +1 -1
mindspore/train/metrics/hausdorff_distance.py +3 -2
mindspore/train/metrics/mean_surface_distance.py +3 -2
mindspore/train/metrics/metric.py +39 -19
mindspore/train/metrics/roc.py +2 -2
mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
mindspore/train/mind_ir_pb2.py +85 -36
mindspore/train/model.py +187 -47
mindspore/train/serialization.py +487 -161
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/_writer_pool.py +3 -2
mindspore/train/summary/summary_record.py +37 -17
mindspore/train/train_thor/convert_utils.py +3 -3
mindspore/train/train_thor/dataset_helper.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
mindspore/_extends/graph_kernel/expander.py +0 -80
mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
mindspore/dataset/datapreprocess/__init__.py +0 -20
mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
mindspore/include/api/net.h +0 -142
mindspore/nn/lr_scheduler.py +0 -262
mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
mindspore/rewrite/node_visitor.py +0 -44
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/dataset/text/utils.py CHANGED Viewed

@@ -29,33 +29,41 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
 class CharNGram(cde.CharNGram):
     """
-    CharNGram object that is used to map tokens into pre-trained vectors.
+    CharNGram pre-trained word embeddings.
+    A word or sentence is represented using a character n-gram count vector, followed by a single
+    nonlinear transformation to yield a low-dimensional embedding.
     """
     @classmethod
     @check_from_file_vectors
     def from_file(cls, file_path, max_vectors=None):
         """
-        Build a `CharNGram` vector from a file.
+        Load the CharNGram pre-training vector set file.
         Args:
-            file_path (str): Path of the file that contains the `CharNGram` vectors.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+            file_path (str): Path to the CharNGram pre-training vector set file.
+            max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
                 Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                 situations where the entire set doesn't fit in memory, or is not needed for another reason,
-                passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
+                this value can limit the size of the loaded set. Default: ``None``, no upper limit.
         Returns:
-            CharNGram, CharNGram vector build from a file.
+            CharNGram, CharNGram pre-training vectors.
         Raises:
-            RuntimeError: If `file_path` contains invalid data.
-            ValueError: If `max_vectors` is invalid.
-            TypeError: If `max_vectors` is not type of integer.
+            TypeError: If `file_path` is not of type str.
+            RuntimeError: If `file_path` does not exist or is not accessible.
+            TypeError: If `max_vectors` is not of type int.
+            ValueError: If `max_vectors` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
+            >>>
             >>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
+            >>> to_vectors = text.ToVectors(char_n_gram)
+            >>> # Look up a token into vectors according CharNGram model.
+            >>> word_vector = to_vectors(["word1", "word2"])
         """
         max_vectors = max_vectors if max_vectors is not None else 0
@@ -64,34 +72,40 @@ class CharNGram(cde.CharNGram):
 class FastText(cde.FastText):
     """
-    FastText object that is used to map tokens into vectors.
+    FastText pre-trained word embeddings.
+    FastText allows one to create an unsupervised learning or supervised learning algorithm vector
+    representations for words.
     """
     @classmethod
     @check_from_file_vectors
     def from_file(cls, file_path, max_vectors=None):
         """
-        Build a FastText vector from a file.
+        Load the FastText pre-training vector set file.
         Args:
-            file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
-                must be `*.vec` .
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+            file_path (str): Path to the FastText pre-trained vector set file. File suffix should be `*.vec`.
+            max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
                 Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                 situations where the entire set doesn't fit in memory, or is not needed for another reason,
-                passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
+                this value can limit the size of the loaded set. Default: ``None``, no upper limit.
         Returns:
-            FastText, FastText vector build from a file.
+            FastText, FastText pre-training vectors.
         Raises:
-            RuntimeError: If `file_path` contains invalid data.
-            ValueError: If `max_vectors` is invalid.
-            TypeError: If `max_vectors` is not type of integer.
+            TypeError: If `file_path` is not of type str.
+            RuntimeError: If `file_path` does not exist or is not accessible.
+            TypeError: If `max_vectors` is not of type int.
+            ValueError: If `max_vectors` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
+            >>> to_vectors = text.ToVectors(fast_text)
+            >>> # Look up a token into vectors according FastText model.
+            >>> word_vector = to_vectors(["word1", "word2"])
         """
         max_vectors = max_vectors if max_vectors is not None else 0
@@ -100,34 +114,39 @@ class FastText(cde.FastText):
 class GloVe(cde.GloVe):
     """
-    GloVe object that is used to map tokens into vectors.
+    Global Vectors (GloVe) pre-trained word embeddings.
+    GloVe is an unsupervised learning algorithm for obtaining vector representations for word.
     """
     @classmethod
     @check_from_file_vectors
     def from_file(cls, file_path, max_vectors=None):
         """
-        Build a GloVe vector from a file.
+        Load the GloVe pre-training vector set file.
         Args:
-            file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
-                must be `glove.6B.*.txt` .
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+            file_path (str): Path to the GloVe pre-training vector set file. File name is similar to `glove.*.txt`.
+            max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
                 Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                 situations where the entire set doesn't fit in memory, or is not needed for another reason,
-                passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
+                this value can limit the size of the loaded set. Default: ``None``, no upper limit.
         Returns:
-            GloVe, GloVe vector build from a file.
+            GloVe, GloVe pre-training vectors.
         Raises:
-            RuntimeError: If `file_path` contains invalid data.
-            ValueError: If `max_vectors` is invalid.
-            TypeError: If `max_vectors` is not type of integer.
+            TypeError: If `file_path` is not of type str.
+            RuntimeError: If `file_path` does not exist or is not accessible.
+            TypeError: If `max_vectors` is not of type int.
+            ValueError: If `max_vectors` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
+            >>> to_vectors = text.ToVectors(glove)
+            >>> # Look up a token into vectors according GloVe model.
+            >>> word_vector = to_vectors(["word1", "word2"])
         """
         max_vectors = max_vectors if max_vectors is not None else 0
@@ -152,12 +171,11 @@ class JiebaMode(IntEnum):
 class NormalizeForm(IntEnum):
     """
-    Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
+    `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
-    Possible enumeration values are: ``NormalizeForm.NONE``, ``NormalizeForm.NFC``, ``NormalizeForm.NFKC``,
-    ``NormalizeForm.NFD`` and ``NormalizeForm.NFKD``.
+    Available values are as follows:
-    - NormalizeForm.NONE: no normalization.
+    - NormalizeForm.NONE: No normalization.
     - NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
     - NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
     - NormalizeForm.NFD: Canonical Decomposition.
@@ -173,17 +191,14 @@ class NormalizeForm(IntEnum):
 class SentencePieceModel(IntEnum):
     """
-    An enumeration for SentencePieceModel.
+    Subword algorithms for SentencePiece.
-    Possible enumeration values are: ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
-    ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
+    Available values are as follows:
-    - SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
-      independent of the previous words generated by the model.
-    - SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
-      a sentence with a single, unused byte.
-    - SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
-    - SentencePieceModel.WORD: refers to word based sentencePiece Model type.
+    - SentencePieceModel.UNIGRAM: `Unigram Language Model <https://arxiv.org/abs/1804.10959>`_ subword algorithm.
+    - SentencePieceModel.BPE: `Byte-Pair-Encoding <https://arxiv.org/abs/1508.07909>`_ subword algorithm.
+    - SentencePieceModel.CHAR: Character-based subword algorithm.
+    - SentencePieceModel.WORD: Word-based subword algorithm.
     """
     UNIGRAM = 0
@@ -221,17 +236,8 @@ class SentencePieceVocab:
             character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
                 languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
                 character set.
-            model_type (SentencePieceModel): It can be ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
-                ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
-                The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD type``.
-                - ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
-                  is assumed to be independent of the previous words generated by the model.
-                - ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
-                  frequent pair of bytes in a sentence with a single, unused byte.
-                - ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
-                - ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
+            model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
+                for details on optional values.
             params (dict): A dictionary with no incoming parameters.
         Returns:
@@ -239,10 +245,16 @@ class SentencePieceVocab:
         Examples:
             >>> import mindspore.dataset as ds
+            >>> import mindspore.dataset.text as text
+            >>>
             >>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
             >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
             >>> vocab = SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
             ...                                         SentencePieceModel.UNIGRAM, {})
+            >>> # Build tokenizer based on vocab
+            >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
+            >>> txt = "Today is Tuesday."
+            >>> token = tokenizer(txt)
         """
         sentence_piece_vocab = cls()
@@ -264,17 +276,8 @@ class SentencePieceVocab:
             character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
                 languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
                 character set.
-            model_type (SentencePieceModel): It can be ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
-                ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
-                The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD`` type.
-                - ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
-                  is assumed to be independent of the previous words generated by the model.
-                - ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
-                  frequent pair of bytes in a sentence with a single, unused byte.
-                - ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
-                - ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
+            model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
+                for details on optional values.
             params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
                 library).
@@ -285,6 +288,10 @@ class SentencePieceVocab:
             >>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
             >>> vocab = SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
             ...                                      SentencePieceModel.UNIGRAM, {})
+            >>> # Build tokenizer based on vocab model
+            >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
+            >>> txt = "Today is Friday."
+            >>> token = tokenizer(txt)
         """
         sentence_piece_vocab = cls()
@@ -315,12 +322,12 @@ class SentencePieceVocab:
 class SPieceTokenizerLoadType(IntEnum):
     """
-    An enumeration for loading type of :class:`mindspore.dataset.text.SentencePieceTokenizer` .
+    Model input type for the SentencePiece tokenizer.
-    Possible enumeration values are: ``SPieceTokenizerLoadType.FILE``, ``SPieceTokenizerLoadType.MODEL``.
+    Available values are as follows:
-    - SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
-    - SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
+    - SPieceTokenizerLoadType.FILE: Load model from specified file path.
+    - SPieceTokenizerLoadType.MODEL: Load model from specified vocab object.
     """
     FILE = 0
@@ -343,33 +350,37 @@ class SPieceTokenizerOutType(IntEnum):
 class Vectors(cde.Vectors):
     """
-    Vectors object that is used to map tokens into vectors.
+    Pre-trained word embeddings.
     """
     @classmethod
     @check_from_file_vectors
     def from_file(cls, file_path, max_vectors=None):
         """
-        Build a vector from a file.
+        Load a pre-training vector set file.
         Args:
-            file_path (str): Path of the file that contains the vectors.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+            file_path (str): Path to the pre-training vector set file.
+            max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
                 Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
                 situations where the entire set doesn't fit in memory, or is not needed for another reason,
-                passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
+                this value can limit the size of the loaded set. Default: ``None``, no upper limit.
         Returns:
-            Vectors, Vectors build from a file.
+            Vectors, pre-training vectors.
         Raises:
-            RuntimeError: If `file_path` contains invalid data.
-            ValueError: If `max_vectors` is invalid.
-            TypeError: If `max_vectors` is not type of integer.
+            TypeError: If `file_path` is not of type str.
+            RuntimeError: If `file_path` does not exist or is not accessible.
+            TypeError: If `max_vectors` is not of type int.
+            ValueError: If `max_vectors` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
+            >>> to_vectors = text.ToVectors(vector)
+            >>> # Look up a token into vectors according Vector model.
+            >>> word_vector = to_vectors(["word1", "word2"])
         """
         max_vectors = max_vectors if max_vectors is not None else 0
@@ -378,9 +389,9 @@ class Vectors(cde.Vectors):
 class Vocab:
     """
-    Vocab object that is used to save pairs of words and ids.
+    Create Vocab for training NLP models.
-    It contains a map that maps each word(str) to an id(int) or reverse.
+    Vocab is a collection of all possible Tokens in the data, preserving the mapping between each Token and its ID.
     """
     def __init__(self):
@@ -390,42 +401,52 @@ class Vocab:
     @check_from_dataset
     def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
         """
-        Build a Vocab from a dataset.
+        Build a Vocab from a given dataset.
-        This would collect all unique words in a dataset and return a vocab within
-        the frequency range specified by user in freq_range. User would be warned if no words fall into the frequency.
-        Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency
-        would be ordered lexicographically.
+        The samples in the dataset are used as a corpus to create Vocab, in which the Token is arranged in ascending
+        order of Token frequency, and Tokens with the same frequency are arranged in alphabetical order.
         Args:
-            dataset (Dataset): dataset to build vocab from.
-            columns (list[str], optional): column names to get words from. It can be a list of column names.
-                Default: ``None``.
-            freq_range (tuple, optional): A tuple of integers (min_frequency, max_frequency). Words within the frequency
-                range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency=0 is the same as
-                min_frequency=1. max_frequency > total_words is the same as max_frequency = total_words.
-                min_frequency/max_frequency can be ``None``, which corresponds to 0/total_words separately.
-                Default: ``None``, all words are included.
-            top_k (int, optional): top_k is greater than 0. Number of words to be built into vocab. top_k means most
-                frequent words are taken. top_k is taken after freq_range. If not enough top_k, all words will be taken.
-                Default: ``None``, all words are included.
-            special_tokens (list, optional):  A list of strings, each one is a special token. For example
-                special_tokens=["<pad>","<unk>"]. Default: ``None``, no special tokens will be added.
-            special_first (bool, optional): Whether `special_tokens` will be prepended/appended to vocab. If
-                `special_tokens` is specified and `special_first` is set to ``True``, special_tokens will be prepended.
-                Default: ``True``.
+            dataset (Dataset): The dataset to build the Vocab from.
+            columns (list[str], optional): The name of the data columns used to create the Vocab.
+                Default: ``None`` , use all columns.
+            freq_range (tuple[int, int], optional): The Token frequency range used to create the Vocab. Must contain
+                two elements representing the minimum and maximum frequencies, within which the Token will be retained.
+                When the minimum or maximum frequency is None, it means there is no minimum or maximum frequency limit.
+                Default: ``None`` , no Token frequency range restriction.
+            top_k (int, optional): Only the first specified number of Tokens with the highest Token frequency are
+                selected to build the Vocab. This operation will be performed after Token frequency filtering. If
+                the value is greater than the total number of Tokens, all Tokens will be retained. Default: ``None`` ,
+                there is no limit to the number of Tokens.
+            special_tokens (list[str], optional):  A list of special Token to append to the Vocab. Default: ``None`` ,
+                no special Token is appended.
+            special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
+                the bottom of the Vocab. Default: ``True``.
         Returns:
-            Vocab, Vocab object built from the dataset.
+            Vocab, Vocab built from the dataset.
+        Raises:
+            TypeError: If `columns` is not of type list[str].
+            TypeError: If `freq_range` is not of type tuple[int, int]l.
+            ValueError: If element of `freq_range` is negative.
+            TypeError: If `top_k` is not of type int.
+            ValueError: If `top_k` is not positive.
+            TypeError: If `special_tokens` is not of type list[str].
+            ValueError: If there are duplicate elements in `special_tokens`.
+            TypeError: If `special_first` is not of type bool.
         Examples:
             >>> import mindspore.dataset as ds
             >>> import mindspore.dataset.text as text
+            >>>
             >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
             >>> vocab = text.Vocab.from_dataset(dataset, "text", freq_range=None, top_k=None,
             ...                                 special_tokens=["<pad>", "<unk>"],
             ...                                 special_first=True)
-            >>> dataset = dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
+            >>> # Use the vocab to look up string to id
+            >>> lookup = text.Lookup(vocab, "<unk>")
+            >>> id = lookup("text1")
         """
         vocab = cls()
@@ -437,22 +458,30 @@ class Vocab:
     @check_from_list
     def from_list(cls, word_list, special_tokens=None, special_first=True):
         """
-        Build a vocab object from a list of word.
+        Build a Vocab from a given Token list.
         Args:
-            word_list (list): A list of string where each element is a word of type string.
-            special_tokens (list, optional):  A list of strings, each one is a special token. For example,
-                special_tokens is ``"<pad>"``, ``"<unk>"``. Default: ``None``, no special tokens will be added.
-            special_first (bool, optional): Whether `special_tokens` is prepended or appended to vocab.
-                If `special_tokens` is specified and special_first is set to ``True``,
-                `special_tokens` will be prepended. Default: ``True``.
+            word_list (list[str]): The Token list to build the Vocab from.
+            special_tokens (list[str], optional):  A list of special Token to append to the Vocab. Default: ``None`` ,
+                no special Token is appended.
+            special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
+                the bottom of the Vocab. Default: ``True``.
         Returns:
-            Vocab, Vocab object built from the list.
+            Vocab, Vocab built from the list.
+        Raises:
+            TypeError: If `word_list` is not of type list[str].
+            ValueError: If there are duplicate elements in `word_list`.
+            TypeError: If `special_tokens` is not of type list[str].
+            ValueError: If there are duplicate elements in `special_tokens`.
+            TypeError: If `special_first` is not of type bool.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
+            >>> # look up strings to ids
+            >>> ids = vocab.tokens_to_ids(["w1", "w3"])
         """
         if special_tokens is None:
@@ -465,21 +494,29 @@ class Vocab:
     @check_from_file
     def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
         """
-        Build a vocab object from a file.
+        Build a Vocab from a file.
         Args:
-            file_path (str): Path to the file which contains the vocab list.
-            delimiter (str, optional): A delimiter to break up each line in file, the first element is taken to be
-                the word. Default: ``''``, the whole line will be treated as a word.
-            vocab_size (int, optional): Number of words to read from file_path. Default: ``None``, all words are taken.
-            special_tokens (list, optional):  A list of strings, each one is a special token. For example
-                special_tokens=["<pad>","<unk>"]. Default: ``None``, no special tokens will be added.
-            special_first (bool, optional): Whether `special_tokens` will be prepended/appended to vocab,
-                If special_tokens is specified and `special_first` is set to ``True``,
-                special_tokens will be prepended. Default: ``True``.
+            file_path (str): The path of the file to build the Vocab from.
+            delimiter (str, optional): The separator for the Token in the file line. The string before the separator
+                will be treated as a Token. Default: ``''``, the whole line will be treated as a Token.
+            vocab_size (int, optional): The upper limit on the number of Tokens that Vocab can contain.
+                Default: ``None`` , no upper limit on the number of Token.
+            special_tokens (list[str], optional):  A list of special Token to append to the Vocab. Default: ``None`` ,
+                no special Token is appended.
+            special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
+                the bottom of the Vocab. Default: ``True``.
         Returns:
-            Vocab, Vocab object built from the file.
+            Vocab, Vocab built from the file.
+        Raises:
+            TypeError: If `file_path` is not of type str.
+            TypeError: If `delimiter` is not of type str.
+            ValueError: If `vocab_size` is not positive.
+            TypeError: If `special_tokens` is not of type list[str].
+            ValueError: If there are duplicate elements in `special_tokens`.
+            TypeError: If `special_first` is not of type bool.
         Examples:
             >>> import mindspore.dataset.text as text
@@ -496,6 +533,9 @@ class Vocab:
             >>>
             >>> # Finally, there are 5 words in the vocab: "<pad>", "<unk>", "apple", "banana", "cat".
             >>> vocabulary = vocab.vocab()
+            >>>
+            >>> # look up strings to ids
+            >>> ids = vocab.tokens_to_ids(["apple", "banana"])
         """
         if vocab_size is None:
@@ -510,18 +550,26 @@ class Vocab:
     @check_from_dict
     def from_dict(cls, word_dict):
         """
-        Build a vocab object from a dict.
+        Build a Vocab from a given dictionary.
         Args:
-            word_dict (dict): Dict contains word and id pairs, where word should be str and id be int. id is recommended
-                to start from 0 and be continuous. ValueError will be raised if id is negative.
+            word_dict (dict[str, int]): A dictionary storing the mappings between each Token and its ID.
         Returns:
-            Vocab, Vocab object built from the dict.
+            Vocab, Vocab built from the dictionary.
+        Raises:
+            TypeError: If `word_dict` is not of type dict[str, int].
+            ValueError: If key value of `word_dict` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
+            >>>
+            >>> # look up ids to string
+            >>> tokens = vocab.ids_to_tokens([3, 4, 5])
+            >>> print(tokens)
+            ['home', 'the', 'world']
         """
         vocab = cls()
@@ -530,15 +578,17 @@ class Vocab:
     def vocab(self):
         """
-        Get the vocabory table in dict type.
+        Get the dictionary of the mappings between Tokens and its IDs.
         Returns:
-            A vocabulary consisting of word and id pairs.
+            dict[str, int], the dictionary of mappings between Tokens and IDs.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
             >>> vocabory_dict = vocab.vocab()
+            >>> print(sorted(vocabory_dict.items()))
+            [('word_1', 0), ('word_2', 1), ('word_3', 2), ('word_4', 3)]
         """
         check_vocab(self.c_vocab)
         return self.c_vocab.vocab()
@@ -546,19 +596,24 @@ class Vocab:
     @check_tokens_to_ids
     def tokens_to_ids(self, tokens):
         """
-        Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
-        If token does not exist, return id with value -1.
+        Look up the ID corresponding to the specified Token.
         Args:
-            tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).
+            tokens (Union[str, list[str], numpy.ndarray]): The Token or list of Tokens to be looked up.
+                If the Token does not exist, -1 is returned.
         Returns:
-            The token id or list of token ids.
+            Union[int, list[int]], the ID(s) corresponding to the Token(s).
+        Raises:
+            TypeError: If `tokens` is not of type Union[str, list[str], numpy.ndarray].
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
             >>> ids = vocab.tokens_to_ids(["w1", "w3"])
+            >>> print(ids)
+            [1, 3]
         """
         check_vocab(self.c_vocab)
         if isinstance(tokens, np.ndarray):
@@ -570,19 +625,25 @@ class Vocab:
     @check_ids_to_tokens
     def ids_to_tokens(self, ids):
         """
-        Converts a single index or a sequence of indices in a token or a sequence of tokens.
-        If id does not exist, return empty string.
+        Look up the Token corresponding to the specified ID.
         Args:
-            ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
+            ids (Union[int, list[int], numpy.ndarray]): The ID or list of IDs to be looked up.
+                If the ID does not exist, an empty string is returned.
         Returns:
-            The decoded token(s).
+            Union[str, list[str]], the Token(s) corresponding to the ID(s).
+        Raises:
+            TypeError: If `ids` is not of type Union[int, list[int], numpy.ndarray].
+            ValueError: If element of `ids` is negative.
         Examples:
             >>> import mindspore.dataset.text as text
             >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
-            >>> token = vocab.ids_to_tokens(0)
+            >>> token = vocab.ids_to_tokens(1)
+            >>> print(token)
+            w1
         """
         check_vocab(self.c_vocab)
         if isinstance(ids, np.ndarray):
@@ -610,8 +671,11 @@ def to_bytes(array, encoding='utf8'):
         >>>
         >>> data = np.array([["1", "2", "3"]], dtype=np.str_)
         >>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
+        >>> result = []
         >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
-        ...     bytes_data = text.to_bytes(item["text"])
+        ...     result.append(text.to_bytes(item["text"]))
+        >>> print(result)
+        [array([b'1', b'2', b'3'], dtype='|S1')]
     """
     if not isinstance(array, np.ndarray):
@@ -638,8 +702,11 @@ def to_str(array, encoding='utf8'):
         >>>
         >>> data = np.array([["1", "2", "3"]], dtype=np.bytes_)
         >>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
+        >>> result = []
         >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
-        ...     str_data = text.to_str(item["text"])
+        ...     result.append(text.to_str(item["text"]))
+        >>> print(result)
+        [array(['1', '2', '3'], dtype='<U1')]
     """
     if not isinstance(array, np.ndarray):