mindspore 2.1.0__cp37-cp37m-win_amd64.whl → 2.2.11__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +13 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +67 -72
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +86 -106
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/amp.py +47 -11
- mindspore/atlprov.dll +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +29 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +33 -7
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +61 -95
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +87 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +31 -19
- mindspore/ops/operations/_grad_ops.py +71 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +192 -144
- mindspore/ops/operations/nn_ops.py +857 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +42 -21
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
mindspore/dataset/text/utils.py
CHANGED
|
@@ -29,33 +29,41 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
|
|
|
29
29
|
|
|
30
30
|
class CharNGram(cde.CharNGram):
|
|
31
31
|
"""
|
|
32
|
-
CharNGram
|
|
32
|
+
CharNGram pre-trained word embeddings.
|
|
33
|
+
|
|
34
|
+
A word or sentence is represented using a character n-gram count vector, followed by a single
|
|
35
|
+
nonlinear transformation to yield a low-dimensional embedding.
|
|
33
36
|
"""
|
|
34
37
|
|
|
35
38
|
@classmethod
|
|
36
39
|
@check_from_file_vectors
|
|
37
40
|
def from_file(cls, file_path, max_vectors=None):
|
|
38
41
|
"""
|
|
39
|
-
|
|
42
|
+
Load the CharNGram pre-training vector set file.
|
|
40
43
|
|
|
41
44
|
Args:
|
|
42
|
-
file_path (str): Path
|
|
43
|
-
max_vectors (int, optional):
|
|
45
|
+
file_path (str): Path to the CharNGram pre-training vector set file.
|
|
46
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
44
47
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
45
48
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
46
|
-
|
|
49
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
47
50
|
|
|
48
51
|
Returns:
|
|
49
|
-
CharNGram, CharNGram
|
|
52
|
+
CharNGram, CharNGram pre-training vectors.
|
|
50
53
|
|
|
51
54
|
Raises:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
TypeError: If `max_vectors` is not type
|
|
55
|
+
TypeError: If `file_path` is not of type str.
|
|
56
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
57
|
+
TypeError: If `max_vectors` is not of type int.
|
|
58
|
+
ValueError: If `max_vectors` is negative.
|
|
55
59
|
|
|
56
60
|
Examples:
|
|
57
61
|
>>> import mindspore.dataset.text as text
|
|
62
|
+
>>>
|
|
58
63
|
>>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
|
|
64
|
+
>>> to_vectors = text.ToVectors(char_n_gram)
|
|
65
|
+
>>> # Look up a token into vectors according CharNGram model.
|
|
66
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
59
67
|
"""
|
|
60
68
|
|
|
61
69
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -64,34 +72,40 @@ class CharNGram(cde.CharNGram):
|
|
|
64
72
|
|
|
65
73
|
class FastText(cde.FastText):
|
|
66
74
|
"""
|
|
67
|
-
FastText
|
|
75
|
+
FastText pre-trained word embeddings.
|
|
76
|
+
|
|
77
|
+
FastText allows one to create an unsupervised learning or supervised learning algorithm vector
|
|
78
|
+
representations for words.
|
|
68
79
|
"""
|
|
69
80
|
|
|
70
81
|
@classmethod
|
|
71
82
|
@check_from_file_vectors
|
|
72
83
|
def from_file(cls, file_path, max_vectors=None):
|
|
73
84
|
"""
|
|
74
|
-
|
|
85
|
+
Load the FastText pre-training vector set file.
|
|
75
86
|
|
|
76
87
|
Args:
|
|
77
|
-
file_path (str): Path
|
|
78
|
-
|
|
79
|
-
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
|
88
|
+
file_path (str): Path to the FastText pre-trained vector set file. File suffix should be `*.vec`.
|
|
89
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
80
90
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
81
91
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
82
|
-
|
|
92
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
83
93
|
|
|
84
94
|
Returns:
|
|
85
|
-
FastText, FastText
|
|
95
|
+
FastText, FastText pre-training vectors.
|
|
86
96
|
|
|
87
97
|
Raises:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
TypeError: If `max_vectors` is not type
|
|
98
|
+
TypeError: If `file_path` is not of type str.
|
|
99
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
100
|
+
TypeError: If `max_vectors` is not of type int.
|
|
101
|
+
ValueError: If `max_vectors` is negative.
|
|
91
102
|
|
|
92
103
|
Examples:
|
|
93
104
|
>>> import mindspore.dataset.text as text
|
|
94
105
|
>>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
|
|
106
|
+
>>> to_vectors = text.ToVectors(fast_text)
|
|
107
|
+
>>> # Look up a token into vectors according FastText model.
|
|
108
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
95
109
|
"""
|
|
96
110
|
|
|
97
111
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -100,34 +114,39 @@ class FastText(cde.FastText):
|
|
|
100
114
|
|
|
101
115
|
class GloVe(cde.GloVe):
|
|
102
116
|
"""
|
|
103
|
-
|
|
117
|
+
Global Vectors (GloVe) pre-trained word embeddings.
|
|
118
|
+
|
|
119
|
+
GloVe is an unsupervised learning algorithm for obtaining vector representations for word.
|
|
104
120
|
"""
|
|
105
121
|
|
|
106
122
|
@classmethod
|
|
107
123
|
@check_from_file_vectors
|
|
108
124
|
def from_file(cls, file_path, max_vectors=None):
|
|
109
125
|
"""
|
|
110
|
-
|
|
126
|
+
Load the GloVe pre-training vector set file.
|
|
111
127
|
|
|
112
128
|
Args:
|
|
113
|
-
file_path (str): Path
|
|
114
|
-
|
|
115
|
-
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
|
129
|
+
file_path (str): Path to the GloVe pre-training vector set file. File name is similar to `glove.*.txt`.
|
|
130
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
116
131
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
117
132
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
118
|
-
|
|
133
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
119
134
|
|
|
120
135
|
Returns:
|
|
121
|
-
GloVe, GloVe
|
|
136
|
+
GloVe, GloVe pre-training vectors.
|
|
122
137
|
|
|
123
138
|
Raises:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
TypeError: If `max_vectors` is not type
|
|
139
|
+
TypeError: If `file_path` is not of type str.
|
|
140
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
141
|
+
TypeError: If `max_vectors` is not of type int.
|
|
142
|
+
ValueError: If `max_vectors` is negative.
|
|
127
143
|
|
|
128
144
|
Examples:
|
|
129
145
|
>>> import mindspore.dataset.text as text
|
|
130
146
|
>>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
|
|
147
|
+
>>> to_vectors = text.ToVectors(glove)
|
|
148
|
+
>>> # Look up a token into vectors according GloVe model.
|
|
149
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
131
150
|
"""
|
|
132
151
|
|
|
133
152
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -152,12 +171,11 @@ class JiebaMode(IntEnum):
|
|
|
152
171
|
|
|
153
172
|
class NormalizeForm(IntEnum):
|
|
154
173
|
"""
|
|
155
|
-
|
|
174
|
+
`Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
|
|
156
175
|
|
|
157
|
-
|
|
158
|
-
``NormalizeForm.NFD`` and ``NormalizeForm.NFKD``.
|
|
176
|
+
Available values are as follows:
|
|
159
177
|
|
|
160
|
-
- NormalizeForm.NONE:
|
|
178
|
+
- NormalizeForm.NONE: No normalization.
|
|
161
179
|
- NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
|
|
162
180
|
- NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
|
|
163
181
|
- NormalizeForm.NFD: Canonical Decomposition.
|
|
@@ -173,17 +191,14 @@ class NormalizeForm(IntEnum):
|
|
|
173
191
|
|
|
174
192
|
class SentencePieceModel(IntEnum):
|
|
175
193
|
"""
|
|
176
|
-
|
|
194
|
+
Subword algorithms for SentencePiece.
|
|
177
195
|
|
|
178
|
-
|
|
179
|
-
``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
|
|
196
|
+
Available values are as follows:
|
|
180
197
|
|
|
181
|
-
- SentencePieceModel.UNIGRAM: Unigram Language Model
|
|
182
|
-
|
|
183
|
-
- SentencePieceModel.
|
|
184
|
-
|
|
185
|
-
- SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
|
|
186
|
-
- SentencePieceModel.WORD: refers to word based sentencePiece Model type.
|
|
198
|
+
- SentencePieceModel.UNIGRAM: `Unigram Language Model <https://arxiv.org/abs/1804.10959>`_ subword algorithm.
|
|
199
|
+
- SentencePieceModel.BPE: `Byte-Pair-Encoding <https://arxiv.org/abs/1508.07909>`_ subword algorithm.
|
|
200
|
+
- SentencePieceModel.CHAR: Character-based subword algorithm.
|
|
201
|
+
- SentencePieceModel.WORD: Word-based subword algorithm.
|
|
187
202
|
"""
|
|
188
203
|
|
|
189
204
|
UNIGRAM = 0
|
|
@@ -221,17 +236,8 @@ class SentencePieceVocab:
|
|
|
221
236
|
character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
|
|
222
237
|
languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
|
|
223
238
|
character set.
|
|
224
|
-
model_type (SentencePieceModel):
|
|
225
|
-
|
|
226
|
-
The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD type``.
|
|
227
|
-
|
|
228
|
-
- ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
|
|
229
|
-
is assumed to be independent of the previous words generated by the model.
|
|
230
|
-
- ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
|
|
231
|
-
frequent pair of bytes in a sentence with a single, unused byte.
|
|
232
|
-
- ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
|
|
233
|
-
- ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
|
|
234
|
-
|
|
239
|
+
model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
|
|
240
|
+
for details on optional values.
|
|
235
241
|
params (dict): A dictionary with no incoming parameters.
|
|
236
242
|
|
|
237
243
|
Returns:
|
|
@@ -239,10 +245,16 @@ class SentencePieceVocab:
|
|
|
239
245
|
|
|
240
246
|
Examples:
|
|
241
247
|
>>> import mindspore.dataset as ds
|
|
248
|
+
>>> import mindspore.dataset.text as text
|
|
249
|
+
>>>
|
|
242
250
|
>>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
|
|
243
251
|
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
|
244
252
|
>>> vocab = SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
|
|
245
253
|
... SentencePieceModel.UNIGRAM, {})
|
|
254
|
+
>>> # Build tokenizer based on vocab
|
|
255
|
+
>>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
|
|
256
|
+
>>> txt = "Today is Tuesday."
|
|
257
|
+
>>> token = tokenizer(txt)
|
|
246
258
|
"""
|
|
247
259
|
|
|
248
260
|
sentence_piece_vocab = cls()
|
|
@@ -264,17 +276,8 @@ class SentencePieceVocab:
|
|
|
264
276
|
character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
|
|
265
277
|
languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
|
|
266
278
|
character set.
|
|
267
|
-
model_type (SentencePieceModel):
|
|
268
|
-
|
|
269
|
-
The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD`` type.
|
|
270
|
-
|
|
271
|
-
- ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
|
|
272
|
-
is assumed to be independent of the previous words generated by the model.
|
|
273
|
-
- ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
|
|
274
|
-
frequent pair of bytes in a sentence with a single, unused byte.
|
|
275
|
-
- ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
|
|
276
|
-
- ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
|
|
277
|
-
|
|
279
|
+
model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
|
|
280
|
+
for details on optional values.
|
|
278
281
|
params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
|
|
279
282
|
library).
|
|
280
283
|
|
|
@@ -285,6 +288,10 @@ class SentencePieceVocab:
|
|
|
285
288
|
>>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
|
|
286
289
|
>>> vocab = SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
|
287
290
|
... SentencePieceModel.UNIGRAM, {})
|
|
291
|
+
>>> # Build tokenizer based on vocab model
|
|
292
|
+
>>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
|
|
293
|
+
>>> txt = "Today is Friday."
|
|
294
|
+
>>> token = tokenizer(txt)
|
|
288
295
|
"""
|
|
289
296
|
|
|
290
297
|
sentence_piece_vocab = cls()
|
|
@@ -315,12 +322,12 @@ class SentencePieceVocab:
|
|
|
315
322
|
|
|
316
323
|
class SPieceTokenizerLoadType(IntEnum):
|
|
317
324
|
"""
|
|
318
|
-
|
|
325
|
+
Model input type for the SentencePiece tokenizer.
|
|
319
326
|
|
|
320
|
-
|
|
327
|
+
Available values are as follows:
|
|
321
328
|
|
|
322
|
-
- SPieceTokenizerLoadType.FILE: Load
|
|
323
|
-
- SPieceTokenizerLoadType.MODEL: Load
|
|
329
|
+
- SPieceTokenizerLoadType.FILE: Load model from specified file path.
|
|
330
|
+
- SPieceTokenizerLoadType.MODEL: Load model from specified vocab object.
|
|
324
331
|
"""
|
|
325
332
|
|
|
326
333
|
FILE = 0
|
|
@@ -343,33 +350,37 @@ class SPieceTokenizerOutType(IntEnum):
|
|
|
343
350
|
|
|
344
351
|
class Vectors(cde.Vectors):
|
|
345
352
|
"""
|
|
346
|
-
|
|
353
|
+
Pre-trained word embeddings.
|
|
347
354
|
"""
|
|
348
355
|
|
|
349
356
|
@classmethod
|
|
350
357
|
@check_from_file_vectors
|
|
351
358
|
def from_file(cls, file_path, max_vectors=None):
|
|
352
359
|
"""
|
|
353
|
-
|
|
360
|
+
Load a pre-training vector set file.
|
|
354
361
|
|
|
355
362
|
Args:
|
|
356
|
-
file_path (str): Path
|
|
357
|
-
max_vectors (int, optional):
|
|
363
|
+
file_path (str): Path to the pre-training vector set file.
|
|
364
|
+
max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
|
|
358
365
|
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
|
359
366
|
situations where the entire set doesn't fit in memory, or is not needed for another reason,
|
|
360
|
-
|
|
367
|
+
this value can limit the size of the loaded set. Default: ``None``, no upper limit.
|
|
361
368
|
|
|
362
369
|
Returns:
|
|
363
|
-
Vectors,
|
|
370
|
+
Vectors, pre-training vectors.
|
|
364
371
|
|
|
365
372
|
Raises:
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
TypeError: If `max_vectors` is not type
|
|
373
|
+
TypeError: If `file_path` is not of type str.
|
|
374
|
+
RuntimeError: If `file_path` does not exist or is not accessible.
|
|
375
|
+
TypeError: If `max_vectors` is not of type int.
|
|
376
|
+
ValueError: If `max_vectors` is negative.
|
|
369
377
|
|
|
370
378
|
Examples:
|
|
371
379
|
>>> import mindspore.dataset.text as text
|
|
372
380
|
>>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
|
|
381
|
+
>>> to_vectors = text.ToVectors(vector)
|
|
382
|
+
>>> # Look up a token into vectors according Vector model.
|
|
383
|
+
>>> word_vector = to_vectors(["word1", "word2"])
|
|
373
384
|
"""
|
|
374
385
|
|
|
375
386
|
max_vectors = max_vectors if max_vectors is not None else 0
|
|
@@ -378,9 +389,9 @@ class Vectors(cde.Vectors):
|
|
|
378
389
|
|
|
379
390
|
class Vocab:
|
|
380
391
|
"""
|
|
381
|
-
Vocab
|
|
392
|
+
Create Vocab for training NLP models.
|
|
382
393
|
|
|
383
|
-
|
|
394
|
+
Vocab is a collection of all possible Tokens in the data, preserving the mapping between each Token and its ID.
|
|
384
395
|
"""
|
|
385
396
|
|
|
386
397
|
def __init__(self):
|
|
@@ -390,42 +401,52 @@ class Vocab:
|
|
|
390
401
|
@check_from_dataset
|
|
391
402
|
def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
|
|
392
403
|
"""
|
|
393
|
-
Build a Vocab from a dataset.
|
|
404
|
+
Build a Vocab from a given dataset.
|
|
394
405
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency
|
|
398
|
-
would be ordered lexicographically.
|
|
406
|
+
The samples in the dataset are used as a corpus to create Vocab, in which the Token is arranged in ascending
|
|
407
|
+
order of Token frequency, and Tokens with the same frequency are arranged in alphabetical order.
|
|
399
408
|
|
|
400
409
|
Args:
|
|
401
|
-
dataset (Dataset): dataset to build
|
|
402
|
-
columns (list[str], optional):
|
|
403
|
-
Default: ``None
|
|
404
|
-
freq_range (tuple, optional):
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
special_tokens (list, optional): A list of
|
|
413
|
-
|
|
414
|
-
special_first (bool, optional): Whether
|
|
415
|
-
|
|
416
|
-
Default: ``True``.
|
|
410
|
+
dataset (Dataset): The dataset to build the Vocab from.
|
|
411
|
+
columns (list[str], optional): The name of the data columns used to create the Vocab.
|
|
412
|
+
Default: ``None`` , use all columns.
|
|
413
|
+
freq_range (tuple[int, int], optional): The Token frequency range used to create the Vocab. Must contain
|
|
414
|
+
two elements representing the minimum and maximum frequencies, within which the Token will be retained.
|
|
415
|
+
When the minimum or maximum frequency is None, it means there is no minimum or maximum frequency limit.
|
|
416
|
+
Default: ``None`` , no Token frequency range restriction.
|
|
417
|
+
top_k (int, optional): Only the first specified number of Tokens with the highest Token frequency are
|
|
418
|
+
selected to build the Vocab. This operation will be performed after Token frequency filtering. If
|
|
419
|
+
the value is greater than the total number of Tokens, all Tokens will be retained. Default: ``None`` ,
|
|
420
|
+
there is no limit to the number of Tokens.
|
|
421
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
422
|
+
no special Token is appended.
|
|
423
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
424
|
+
the bottom of the Vocab. Default: ``True``.
|
|
417
425
|
|
|
418
426
|
Returns:
|
|
419
|
-
Vocab, Vocab
|
|
427
|
+
Vocab, Vocab built from the dataset.
|
|
428
|
+
|
|
429
|
+
Raises:
|
|
430
|
+
TypeError: If `columns` is not of type list[str].
|
|
431
|
+
TypeError: If `freq_range` is not of type tuple[int, int]l.
|
|
432
|
+
ValueError: If element of `freq_range` is negative.
|
|
433
|
+
TypeError: If `top_k` is not of type int.
|
|
434
|
+
ValueError: If `top_k` is not positive.
|
|
435
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
436
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
437
|
+
TypeError: If `special_first` is not of type bool.
|
|
420
438
|
|
|
421
439
|
Examples:
|
|
422
440
|
>>> import mindspore.dataset as ds
|
|
423
441
|
>>> import mindspore.dataset.text as text
|
|
442
|
+
>>>
|
|
424
443
|
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
|
425
444
|
>>> vocab = text.Vocab.from_dataset(dataset, "text", freq_range=None, top_k=None,
|
|
426
445
|
... special_tokens=["<pad>", "<unk>"],
|
|
427
446
|
... special_first=True)
|
|
428
|
-
>>>
|
|
447
|
+
>>> # Use the vocab to look up string to id
|
|
448
|
+
>>> lookup = text.Lookup(vocab, "<unk>")
|
|
449
|
+
>>> id = lookup("text1")
|
|
429
450
|
"""
|
|
430
451
|
|
|
431
452
|
vocab = cls()
|
|
@@ -437,22 +458,30 @@ class Vocab:
|
|
|
437
458
|
@check_from_list
|
|
438
459
|
def from_list(cls, word_list, special_tokens=None, special_first=True):
|
|
439
460
|
"""
|
|
440
|
-
Build a
|
|
461
|
+
Build a Vocab from a given Token list.
|
|
441
462
|
|
|
442
463
|
Args:
|
|
443
|
-
word_list (list):
|
|
444
|
-
special_tokens (list, optional): A list of
|
|
445
|
-
|
|
446
|
-
special_first (bool, optional): Whether
|
|
447
|
-
|
|
448
|
-
`special_tokens` will be prepended. Default: ``True``.
|
|
464
|
+
word_list (list[str]): The Token list to build the Vocab from.
|
|
465
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
466
|
+
no special Token is appended.
|
|
467
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
468
|
+
the bottom of the Vocab. Default: ``True``.
|
|
449
469
|
|
|
450
470
|
Returns:
|
|
451
|
-
Vocab, Vocab
|
|
471
|
+
Vocab, Vocab built from the list.
|
|
472
|
+
|
|
473
|
+
Raises:
|
|
474
|
+
TypeError: If `word_list` is not of type list[str].
|
|
475
|
+
ValueError: If there are duplicate elements in `word_list`.
|
|
476
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
477
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
478
|
+
TypeError: If `special_first` is not of type bool.
|
|
452
479
|
|
|
453
480
|
Examples:
|
|
454
481
|
>>> import mindspore.dataset.text as text
|
|
455
482
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
483
|
+
>>> # look up strings to ids
|
|
484
|
+
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
|
456
485
|
"""
|
|
457
486
|
|
|
458
487
|
if special_tokens is None:
|
|
@@ -465,21 +494,29 @@ class Vocab:
|
|
|
465
494
|
@check_from_file
|
|
466
495
|
def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
|
|
467
496
|
"""
|
|
468
|
-
Build a
|
|
497
|
+
Build a Vocab from a file.
|
|
469
498
|
|
|
470
499
|
Args:
|
|
471
|
-
file_path (str):
|
|
472
|
-
delimiter (str, optional):
|
|
473
|
-
|
|
474
|
-
vocab_size (int, optional):
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
500
|
+
file_path (str): The path of the file to build the Vocab from.
|
|
501
|
+
delimiter (str, optional): The separator for the Token in the file line. The string before the separator
|
|
502
|
+
will be treated as a Token. Default: ``''``, the whole line will be treated as a Token.
|
|
503
|
+
vocab_size (int, optional): The upper limit on the number of Tokens that Vocab can contain.
|
|
504
|
+
Default: ``None`` , no upper limit on the number of Token.
|
|
505
|
+
special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
|
|
506
|
+
no special Token is appended.
|
|
507
|
+
special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
|
|
508
|
+
the bottom of the Vocab. Default: ``True``.
|
|
480
509
|
|
|
481
510
|
Returns:
|
|
482
|
-
Vocab, Vocab
|
|
511
|
+
Vocab, Vocab built from the file.
|
|
512
|
+
|
|
513
|
+
Raises:
|
|
514
|
+
TypeError: If `file_path` is not of type str.
|
|
515
|
+
TypeError: If `delimiter` is not of type str.
|
|
516
|
+
ValueError: If `vocab_size` is not positive.
|
|
517
|
+
TypeError: If `special_tokens` is not of type list[str].
|
|
518
|
+
ValueError: If there are duplicate elements in `special_tokens`.
|
|
519
|
+
TypeError: If `special_first` is not of type bool.
|
|
483
520
|
|
|
484
521
|
Examples:
|
|
485
522
|
>>> import mindspore.dataset.text as text
|
|
@@ -496,6 +533,9 @@ class Vocab:
|
|
|
496
533
|
>>>
|
|
497
534
|
>>> # Finally, there are 5 words in the vocab: "<pad>", "<unk>", "apple", "banana", "cat".
|
|
498
535
|
>>> vocabulary = vocab.vocab()
|
|
536
|
+
>>>
|
|
537
|
+
>>> # look up strings to ids
|
|
538
|
+
>>> ids = vocab.tokens_to_ids(["apple", "banana"])
|
|
499
539
|
"""
|
|
500
540
|
|
|
501
541
|
if vocab_size is None:
|
|
@@ -510,18 +550,26 @@ class Vocab:
|
|
|
510
550
|
@check_from_dict
|
|
511
551
|
def from_dict(cls, word_dict):
|
|
512
552
|
"""
|
|
513
|
-
Build a
|
|
553
|
+
Build a Vocab from a given dictionary.
|
|
514
554
|
|
|
515
555
|
Args:
|
|
516
|
-
word_dict (dict):
|
|
517
|
-
to start from 0 and be continuous. ValueError will be raised if id is negative.
|
|
556
|
+
word_dict (dict[str, int]): A dictionary storing the mappings between each Token and its ID.
|
|
518
557
|
|
|
519
558
|
Returns:
|
|
520
|
-
Vocab, Vocab
|
|
559
|
+
Vocab, Vocab built from the dictionary.
|
|
560
|
+
|
|
561
|
+
Raises:
|
|
562
|
+
TypeError: If `word_dict` is not of type dict[str, int].
|
|
563
|
+
ValueError: If key value of `word_dict` is negative.
|
|
521
564
|
|
|
522
565
|
Examples:
|
|
523
566
|
>>> import mindspore.dataset.text as text
|
|
524
567
|
>>> vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
|
|
568
|
+
>>>
|
|
569
|
+
>>> # look up ids to string
|
|
570
|
+
>>> tokens = vocab.ids_to_tokens([3, 4, 5])
|
|
571
|
+
>>> print(tokens)
|
|
572
|
+
['home', 'the', 'world']
|
|
525
573
|
"""
|
|
526
574
|
|
|
527
575
|
vocab = cls()
|
|
@@ -530,15 +578,17 @@ class Vocab:
|
|
|
530
578
|
|
|
531
579
|
def vocab(self):
|
|
532
580
|
"""
|
|
533
|
-
Get the
|
|
581
|
+
Get the dictionary of the mappings between Tokens and its IDs.
|
|
534
582
|
|
|
535
583
|
Returns:
|
|
536
|
-
|
|
584
|
+
dict[str, int], the dictionary of mappings between Tokens and IDs.
|
|
537
585
|
|
|
538
586
|
Examples:
|
|
539
587
|
>>> import mindspore.dataset.text as text
|
|
540
588
|
>>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
|
|
541
589
|
>>> vocabory_dict = vocab.vocab()
|
|
590
|
+
>>> print(sorted(vocabory_dict.items()))
|
|
591
|
+
[('word_1', 0), ('word_2', 1), ('word_3', 2), ('word_4', 3)]
|
|
542
592
|
"""
|
|
543
593
|
check_vocab(self.c_vocab)
|
|
544
594
|
return self.c_vocab.vocab()
|
|
@@ -546,19 +596,24 @@ class Vocab:
|
|
|
546
596
|
@check_tokens_to_ids
|
|
547
597
|
def tokens_to_ids(self, tokens):
|
|
548
598
|
"""
|
|
549
|
-
|
|
550
|
-
If token does not exist, return id with value -1.
|
|
599
|
+
Look up the ID corresponding to the specified Token.
|
|
551
600
|
|
|
552
601
|
Args:
|
|
553
|
-
tokens (Union[str, list[str]]):
|
|
602
|
+
tokens (Union[str, list[str], numpy.ndarray]): The Token or list of Tokens to be looked up.
|
|
603
|
+
If the Token does not exist, -1 is returned.
|
|
554
604
|
|
|
555
605
|
Returns:
|
|
556
|
-
|
|
606
|
+
Union[int, list[int]], the ID(s) corresponding to the Token(s).
|
|
607
|
+
|
|
608
|
+
Raises:
|
|
609
|
+
TypeError: If `tokens` is not of type Union[str, list[str], numpy.ndarray].
|
|
557
610
|
|
|
558
611
|
Examples:
|
|
559
612
|
>>> import mindspore.dataset.text as text
|
|
560
613
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
561
614
|
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
|
615
|
+
>>> print(ids)
|
|
616
|
+
[1, 3]
|
|
562
617
|
"""
|
|
563
618
|
check_vocab(self.c_vocab)
|
|
564
619
|
if isinstance(tokens, np.ndarray):
|
|
@@ -570,19 +625,25 @@ class Vocab:
|
|
|
570
625
|
@check_ids_to_tokens
|
|
571
626
|
def ids_to_tokens(self, ids):
|
|
572
627
|
"""
|
|
573
|
-
|
|
574
|
-
If id does not exist, return empty string.
|
|
628
|
+
Look up the Token corresponding to the specified ID.
|
|
575
629
|
|
|
576
630
|
Args:
|
|
577
|
-
ids (Union[int, list[int]]): The
|
|
631
|
+
ids (Union[int, list[int], numpy.ndarray]): The ID or list of IDs to be looked up.
|
|
632
|
+
If the ID does not exist, an empty string is returned.
|
|
578
633
|
|
|
579
634
|
Returns:
|
|
580
|
-
|
|
635
|
+
Union[str, list[str]], the Token(s) corresponding to the ID(s).
|
|
636
|
+
|
|
637
|
+
Raises:
|
|
638
|
+
TypeError: If `ids` is not of type Union[int, list[int], numpy.ndarray].
|
|
639
|
+
ValueError: If element of `ids` is negative.
|
|
581
640
|
|
|
582
641
|
Examples:
|
|
583
642
|
>>> import mindspore.dataset.text as text
|
|
584
643
|
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
|
585
|
-
>>> token = vocab.ids_to_tokens(
|
|
644
|
+
>>> token = vocab.ids_to_tokens(1)
|
|
645
|
+
>>> print(token)
|
|
646
|
+
w1
|
|
586
647
|
"""
|
|
587
648
|
check_vocab(self.c_vocab)
|
|
588
649
|
if isinstance(ids, np.ndarray):
|
|
@@ -610,8 +671,11 @@ def to_bytes(array, encoding='utf8'):
|
|
|
610
671
|
>>>
|
|
611
672
|
>>> data = np.array([["1", "2", "3"]], dtype=np.str_)
|
|
612
673
|
>>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
|
|
674
|
+
>>> result = []
|
|
613
675
|
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
614
|
-
...
|
|
676
|
+
... result.append(text.to_bytes(item["text"]))
|
|
677
|
+
>>> print(result)
|
|
678
|
+
[array([b'1', b'2', b'3'], dtype='|S1')]
|
|
615
679
|
"""
|
|
616
680
|
|
|
617
681
|
if not isinstance(array, np.ndarray):
|
|
@@ -638,8 +702,11 @@ def to_str(array, encoding='utf8'):
|
|
|
638
702
|
>>>
|
|
639
703
|
>>> data = np.array([["1", "2", "3"]], dtype=np.bytes_)
|
|
640
704
|
>>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
|
|
705
|
+
>>> result = []
|
|
641
706
|
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
642
|
-
...
|
|
707
|
+
... result.append(text.to_str(item["text"]))
|
|
708
|
+
>>> print(result)
|
|
709
|
+
[array(['1', '2', '3'], dtype='<U1')]
|
|
643
710
|
"""
|
|
644
711
|
|
|
645
712
|
if not isinstance(array, np.ndarray):
|