mindspore 2.1.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.10__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (580) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +46 -19
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
  20. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  21. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  22. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  23. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  24. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  25. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  26. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  27. mindspore/_akg/akg/utils/kernel_exec.py +98 -274
  28. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  29. mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
  30. mindspore/_akg/akg/utils/util.py +38 -0
  31. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  32. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  33. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  34. mindspore/_check_jit_forbidden_api.py +3 -1
  35. mindspore/_checkparam.py +23 -29
  36. mindspore/_extends/graph_kernel/__init__.py +0 -1
  37. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  38. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  39. mindspore/_extends/graph_kernel/splitter.py +4 -11
  40. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  41. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  42. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  43. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  44. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  45. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  46. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  47. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  48. mindspore/_extends/parse/__init__.py +12 -15
  49. mindspore/_extends/parse/namespace.py +7 -33
  50. mindspore/_extends/parse/parser.py +61 -71
  51. mindspore/_extends/parse/resources.py +1 -1
  52. mindspore/_extends/parse/standard_method.py +74 -104
  53. mindspore/_extends/parse/trope.py +1 -1
  54. mindspore/_extends/remote/kernel_build_server.py +25 -7
  55. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  56. mindspore/_install_custom.py +43 -0
  57. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  58. mindspore/amp.py +47 -11
  59. mindspore/bin/cache_admin +0 -0
  60. mindspore/bin/cache_server +0 -0
  61. mindspore/boost/boost.py +1 -8
  62. mindspore/boost/boost_cell_wrapper.py +3 -2
  63. mindspore/boost/grad_accumulation.py +1 -1
  64. mindspore/boost/group_loss_scale_manager.py +8 -7
  65. mindspore/common/__init__.py +5 -3
  66. mindspore/common/_jit_fallback_utils.py +6 -0
  67. mindspore/common/_register_for_adapter.py +2 -0
  68. mindspore/common/_register_for_tensor.py +2 -2
  69. mindspore/common/_stub_tensor.py +13 -0
  70. mindspore/common/_utils.py +13 -0
  71. mindspore/common/api.py +174 -259
  72. mindspore/common/auto_dynamic_shape.py +494 -0
  73. mindspore/common/dtype.py +18 -11
  74. mindspore/common/dump.py +6 -4
  75. mindspore/common/initializer.py +14 -14
  76. mindspore/common/jit_config.py +33 -15
  77. mindspore/common/lazy_inline.py +126 -7
  78. mindspore/common/mindir_util.py +101 -0
  79. mindspore/common/parameter.py +51 -41
  80. mindspore/common/seed.py +4 -4
  81. mindspore/common/sparse_tensor.py +13 -14
  82. mindspore/common/tensor.py +243 -165
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +83 -4
  85. mindspore/communication/management.py +152 -84
  86. mindspore/config/op_info.config +14 -3
  87. mindspore/config/super_bar_config.json +4 -2
  88. mindspore/context.py +152 -61
  89. mindspore/dataset/__init__.py +5 -5
  90. mindspore/dataset/audio/__init__.py +2 -2
  91. mindspore/dataset/audio/transforms.py +52 -52
  92. mindspore/dataset/callback/ds_callback.py +16 -2
  93. mindspore/dataset/core/config.py +68 -51
  94. mindspore/dataset/engine/cache_client.py +28 -5
  95. mindspore/dataset/engine/datasets.py +250 -112
  96. mindspore/dataset/engine/datasets_audio.py +43 -211
  97. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  98. mindspore/dataset/engine/datasets_text.py +43 -67
  99. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  100. mindspore/dataset/engine/datasets_vision.py +219 -1029
  101. mindspore/dataset/engine/iterators.py +11 -4
  102. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  103. mindspore/dataset/engine/obs/util.py +3 -0
  104. mindspore/dataset/engine/samplers.py +1 -1
  105. mindspore/dataset/engine/validators.py +19 -5
  106. mindspore/dataset/text/__init__.py +3 -3
  107. mindspore/dataset/text/transforms.py +101 -127
  108. mindspore/dataset/text/utils.py +205 -138
  109. mindspore/dataset/transforms/__init__.py +1 -1
  110. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  111. mindspore/dataset/transforms/transforms.py +95 -40
  112. mindspore/dataset/utils/browse_dataset.py +8 -2
  113. mindspore/dataset/utils/line_reader.py +17 -19
  114. mindspore/dataset/vision/__init__.py +3 -3
  115. mindspore/dataset/vision/c_transforms.py +6 -3
  116. mindspore/dataset/vision/transforms.py +409 -287
  117. mindspore/dataset/vision/utils.py +13 -14
  118. mindspore/dataset/vision/validators.py +11 -1
  119. mindspore/experimental/map_parameter.py +14 -0
  120. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  121. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  122. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  123. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  124. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  125. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  126. mindspore/gen_ops.py +273 -0
  127. mindspore/include/OWNERS +0 -1
  128. mindspore/include/api/data_type.h +2 -1
  129. mindspore/include/api/graph.h +0 -15
  130. mindspore/include/api/kernel.h +2 -0
  131. mindspore/include/api/kernel_api.h +37 -12
  132. mindspore/include/api/model.h +17 -14
  133. mindspore/include/api/status.h +8 -3
  134. mindspore/include/api/types.h +37 -4
  135. mindspore/include/c_api/ms/abstract.h +67 -0
  136. mindspore/include/c_api/ms/attribute.h +197 -0
  137. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  138. mindspore/include/c_api/ms/base/macros.h +32 -0
  139. mindspore/include/c_api/ms/base/status.h +33 -0
  140. mindspore/include/c_api/ms/base/types.h +282 -0
  141. mindspore/include/c_api/ms/context.h +102 -0
  142. mindspore/include/c_api/ms/graph.h +160 -0
  143. mindspore/include/c_api/ms/node.h +606 -0
  144. mindspore/include/c_api/ms/tensor.h +161 -0
  145. mindspore/include/c_api/ms/value.h +84 -0
  146. mindspore/include/dataset/constants.h +6 -5
  147. mindspore/include/dataset/execute.h +23 -13
  148. mindspore/include/dataset/text.h +26 -26
  149. mindspore/include/dataset/transforms.h +13 -13
  150. mindspore/include/dataset/vision.h +60 -60
  151. mindspore/include/dataset/vision_ascend.h +5 -6
  152. mindspore/include/dataset/vision_lite.h +17 -17
  153. mindspore/include/mindapi/base/type_id.h +1 -0
  154. mindspore/include/mindapi/base/types.h +1 -0
  155. mindspore/lib/libdnnl.so.2 +0 -0
  156. mindspore/lib/libjemalloc.so.2 +0 -0
  157. mindspore/lib/libmindspore.so +0 -0
  158. mindspore/lib/libmindspore_backend.so +0 -0
  159. mindspore/lib/libmindspore_common.so +0 -0
  160. mindspore/lib/libmindspore_core.so +0 -0
  161. mindspore/lib/libmindspore_glog.so.0 +0 -0
  162. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  163. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  164. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  165. mindspore/lib/libmindspore_shared_lib.so +0 -0
  166. mindspore/lib/libnnacl.so +0 -0
  167. mindspore/lib/libopencv_core.so.4.5 +0 -0
  168. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  169. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  170. mindspore/lib/libps_cache.so +0 -0
  171. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  172. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  173. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  174. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  175. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  176. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  177. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  178. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  179. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  180. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  181. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  182. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  183. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  186. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8928 -0
  187. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  188. mindspore/lib/plugin/ascend/libakg.so +0 -0
  189. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  190. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  191. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  192. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  193. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  194. mindspore/lib/plugin/cpu/libakg.so +0 -0
  195. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  196. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  197. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  198. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  199. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  201. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  202. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  203. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  204. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  205. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  206. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  207. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  208. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  209. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  210. mindspore/nn/__init__.py +0 -2
  211. mindspore/nn/cell.py +313 -74
  212. mindspore/nn/dynamic_lr.py +21 -21
  213. mindspore/nn/layer/activation.py +22 -30
  214. mindspore/nn/layer/basic.py +15 -13
  215. mindspore/nn/layer/channel_shuffle.py +1 -1
  216. mindspore/nn/layer/container.py +271 -9
  217. mindspore/nn/layer/conv.py +323 -204
  218. mindspore/nn/layer/dense.py +8 -5
  219. mindspore/nn/layer/embedding.py +33 -27
  220. mindspore/nn/layer/flash_attention.py +141 -88
  221. mindspore/nn/layer/image.py +8 -6
  222. mindspore/nn/layer/math.py +16 -25
  223. mindspore/nn/layer/normalization.py +107 -66
  224. mindspore/nn/layer/padding.py +1 -1
  225. mindspore/nn/layer/pooling.py +131 -109
  226. mindspore/nn/layer/rnn_cells.py +27 -22
  227. mindspore/nn/layer/rnns.py +13 -16
  228. mindspore/nn/layer/thor_layer.py +1 -1
  229. mindspore/nn/layer/transformer.py +221 -154
  230. mindspore/nn/learning_rate_schedule.py +9 -1
  231. mindspore/nn/loss/loss.py +235 -174
  232. mindspore/nn/optim/ada_grad.py +2 -1
  233. mindspore/nn/optim/adadelta.py +1 -0
  234. mindspore/nn/optim/adafactor.py +2 -1
  235. mindspore/nn/optim/adam.py +7 -4
  236. mindspore/nn/optim/adamax.py +3 -2
  237. mindspore/nn/optim/adasum.py +2 -2
  238. mindspore/nn/optim/asgd.py +2 -3
  239. mindspore/nn/optim/ftrl.py +6 -5
  240. mindspore/nn/optim/lamb.py +7 -4
  241. mindspore/nn/optim/lars.py +1 -1
  242. mindspore/nn/optim/lazyadam.py +5 -3
  243. mindspore/nn/optim/momentum.py +2 -1
  244. mindspore/nn/optim/optimizer.py +53 -4
  245. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  246. mindspore/nn/optim/rmsprop.py +4 -3
  247. mindspore/nn/optim/rprop.py +23 -12
  248. mindspore/nn/optim/sgd.py +26 -11
  249. mindspore/nn/optim/thor.py +9 -7
  250. mindspore/nn/probability/bijector/bijector.py +5 -5
  251. mindspore/nn/probability/bijector/power_transform.py +27 -27
  252. mindspore/nn/probability/bijector/softplus.py +3 -3
  253. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  254. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  255. mindspore/nn/probability/distribution/beta.py +3 -3
  256. mindspore/nn/probability/distribution/categorical.py +7 -7
  257. mindspore/nn/probability/distribution/cauchy.py +0 -1
  258. mindspore/nn/probability/distribution/distribution.py +3 -3
  259. mindspore/nn/probability/distribution/gamma.py +3 -3
  260. mindspore/nn/probability/distribution/geometric.py +4 -4
  261. mindspore/nn/probability/distribution/gumbel.py +4 -4
  262. mindspore/nn/probability/distribution/log_normal.py +2 -2
  263. mindspore/nn/probability/distribution/logistic.py +2 -2
  264. mindspore/nn/probability/distribution/poisson.py +4 -4
  265. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  266. mindspore/nn/probability/distribution/uniform.py +6 -6
  267. mindspore/nn/wrap/cell_wrapper.py +84 -34
  268. mindspore/nn/wrap/grad_reducer.py +8 -5
  269. mindspore/nn/wrap/loss_scale.py +105 -42
  270. mindspore/numpy/array_creations.py +1 -2
  271. mindspore/numpy/array_ops.py +3 -2
  272. mindspore/numpy/utils_const.py +5 -5
  273. mindspore/offline_debug/convert_async.py +2 -2
  274. mindspore/ops/_grad_experimental/__init__.py +0 -5
  275. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  276. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  277. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  278. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  279. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  280. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  281. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  282. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  283. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  284. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
  285. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
  286. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
  287. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
  288. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
  289. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
  290. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  291. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  292. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  293. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  294. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  295. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  296. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  297. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  298. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  299. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  300. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  301. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  302. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  303. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  304. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  305. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  306. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  307. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  308. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  309. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  310. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  311. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  312. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  313. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  314. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  315. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  316. mindspore/ops/_primitive_cache.py +1 -1
  317. mindspore/ops/_tracefunc.py +45 -13
  318. mindspore/ops/_utils/utils.py +6 -1
  319. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  320. mindspore/ops/_vmap/vmap_base.py +3 -3
  321. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  322. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  323. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  324. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  325. mindspore/ops/arg_dtype_cast.py +54 -0
  326. mindspore/ops/composite/base.py +37 -10
  327. mindspore/ops/composite/math_ops.py +5 -4
  328. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  329. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  330. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  331. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  332. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  333. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  334. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  335. mindspore/ops/deprecated.py +304 -0
  336. mindspore/ops/function/__init__.py +4 -1
  337. mindspore/ops/function/array_func.py +174 -193
  338. mindspore/ops/function/clip_func.py +81 -13
  339. mindspore/ops/function/debug_func.py +1 -1
  340. mindspore/ops/function/grad/grad_func.py +18 -9
  341. mindspore/ops/function/image_func.py +10 -4
  342. mindspore/ops/function/linalg_func.py +5 -5
  343. mindspore/ops/function/math_func.py +575 -386
  344. mindspore/ops/function/nn_func.py +568 -260
  345. mindspore/ops/function/random_func.py +88 -57
  346. mindspore/ops/function/sparse_func.py +1 -1
  347. mindspore/ops/function/sparse_unary_func.py +14 -12
  348. mindspore/ops/function/vmap_func.py +6 -5
  349. mindspore/ops/functional.py +15 -10
  350. mindspore/ops/op_info_register.py +244 -25
  351. mindspore/ops/operations/__init__.py +28 -19
  352. mindspore/ops/operations/_grad_ops.py +72 -7
  353. mindspore/ops/operations/_inner_ops.py +350 -17
  354. mindspore/ops/operations/_quant_ops.py +4 -8
  355. mindspore/ops/operations/_sequence_ops.py +42 -0
  356. mindspore/ops/operations/array_ops.py +68 -282
  357. mindspore/ops/operations/comm_ops.py +107 -59
  358. mindspore/ops/operations/custom_ops.py +94 -70
  359. mindspore/ops/operations/debug_ops.py +8 -4
  360. mindspore/ops/operations/image_ops.py +18 -12
  361. mindspore/ops/operations/inner_ops.py +26 -3
  362. mindspore/ops/operations/math_ops.py +189 -141
  363. mindspore/ops/operations/nn_ops.py +794 -489
  364. mindspore/ops/operations/other_ops.py +0 -22
  365. mindspore/ops/operations/random_ops.py +53 -111
  366. mindspore/ops/operations/sparse_ops.py +3 -1
  367. mindspore/ops/primitive.py +24 -18
  368. mindspore/parallel/_auto_parallel_context.py +68 -8
  369. mindspore/parallel/_cost_model_context.py +2 -2
  370. mindspore/parallel/_offload_context.py +17 -3
  371. mindspore/parallel/_parallel_serialization.py +12 -5
  372. mindspore/parallel/_ps_context.py +12 -0
  373. mindspore/parallel/_tensor.py +18 -13
  374. mindspore/parallel/_transformer/layers.py +5 -3
  375. mindspore/parallel/_transformer/loss.py +1 -0
  376. mindspore/parallel/_transformer/moe.py +2 -2
  377. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  378. mindspore/parallel/_transformer/transformer.py +23 -3
  379. mindspore/parallel/_utils.py +11 -7
  380. mindspore/parallel/algo_parameter_config.py +85 -5
  381. mindspore/parallel/checkpoint_transform.py +19 -12
  382. mindspore/parallel/shard.py +21 -14
  383. mindspore/profiler/common/struct_type.py +3 -3
  384. mindspore/profiler/common/util.py +4 -2
  385. mindspore/profiler/envprofiling.py +1 -1
  386. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  387. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  388. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  389. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  390. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  391. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  392. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  393. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  394. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  395. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  396. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  397. mindspore/profiler/parser/flops_parser.py +15 -11
  398. mindspore/profiler/parser/framework_parser.py +38 -22
  399. mindspore/profiler/parser/hccl_parser.py +16 -12
  400. mindspore/profiler/parser/integrator.py +22 -11
  401. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  402. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  403. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  404. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  405. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  406. mindspore/profiler/parser/optime_parser.py +1 -1
  407. mindspore/profiler/parser/profiler_info.py +21 -2
  408. mindspore/profiler/parser/step_trace_parser.py +11 -14
  409. mindspore/profiler/profiling.py +179 -89
  410. mindspore/rewrite/api/node.py +102 -19
  411. mindspore/rewrite/api/node_type.py +5 -1
  412. mindspore/rewrite/api/pattern_engine.py +1 -1
  413. mindspore/rewrite/api/scoped_value.py +9 -17
  414. mindspore/rewrite/api/symbol_tree.py +131 -47
  415. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  416. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  417. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  418. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  419. mindspore/rewrite/common/rewrite_elog.py +5 -1
  420. mindspore/rewrite/namer.py +33 -24
  421. mindspore/rewrite/namespace.py +14 -5
  422. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  423. mindspore/rewrite/node/call_function.py +79 -0
  424. mindspore/rewrite/node/cell_container.py +135 -0
  425. mindspore/rewrite/node/control_flow.py +88 -0
  426. mindspore/rewrite/{node.py → node/node.py} +273 -234
  427. mindspore/rewrite/node/node_manager.py +254 -0
  428. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  429. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  430. mindspore/rewrite/parsers/assign_parser.py +216 -221
  431. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  432. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  433. mindspore/rewrite/parsers/constant_parser.py +9 -6
  434. mindspore/rewrite/parsers/container_parser.py +9 -7
  435. mindspore/rewrite/parsers/for_parser.py +36 -15
  436. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  437. mindspore/rewrite/parsers/if_parser.py +28 -24
  438. mindspore/rewrite/parsers/module_parser.py +196 -25
  439. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  440. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  441. mindspore/rewrite/parsers/return_parser.py +6 -6
  442. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  443. mindspore/rewrite/sparsify/utils.py +1 -1
  444. mindspore/rewrite/symbol_tree.py +523 -578
  445. mindspore/rewrite/symbol_tree_builder.py +9 -193
  446. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  447. mindspore/run_check/_check_version.py +6 -4
  448. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  449. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  450. mindspore/scipy/linalg.py +1 -1
  451. mindspore/scipy/optimize/minimize.py +7 -3
  452. mindspore/train/_utils.py +7 -3
  453. mindspore/train/amp.py +323 -123
  454. mindspore/train/anf_ir_pb2.py +14 -2
  455. mindspore/train/callback/_backup_and_restore.py +2 -12
  456. mindspore/train/callback/_callback.py +29 -4
  457. mindspore/train/callback/_checkpoint.py +23 -8
  458. mindspore/train/callback/_early_stop.py +2 -2
  459. mindspore/train/callback/_landscape.py +4 -4
  460. mindspore/train/callback/_loss_monitor.py +2 -2
  461. mindspore/train/callback/_on_request_exit.py +2 -2
  462. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  463. mindspore/train/callback/_summary_collector.py +15 -8
  464. mindspore/train/callback/_time_monitor.py +58 -5
  465. mindspore/train/data_sink.py +5 -11
  466. mindspore/train/dataset_helper.py +84 -57
  467. mindspore/train/loss_scale_manager.py +2 -2
  468. mindspore/train/metrics/__init__.py +3 -3
  469. mindspore/train/metrics/cosine_similarity.py +1 -1
  470. mindspore/train/metrics/hausdorff_distance.py +3 -2
  471. mindspore/train/metrics/mean_surface_distance.py +3 -2
  472. mindspore/train/metrics/metric.py +39 -19
  473. mindspore/train/metrics/roc.py +2 -2
  474. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  475. mindspore/train/mind_ir_pb2.py +85 -36
  476. mindspore/train/model.py +187 -47
  477. mindspore/train/serialization.py +487 -161
  478. mindspore/train/summary/_summary_adapter.py +1 -1
  479. mindspore/train/summary/_writer_pool.py +3 -2
  480. mindspore/train/summary/summary_record.py +37 -17
  481. mindspore/train/train_thor/convert_utils.py +3 -3
  482. mindspore/train/train_thor/dataset_helper.py +1 -1
  483. mindspore/version.py +1 -1
  484. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +6 -7
  485. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +488 -528
  486. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -1
  487. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  488. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  489. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  490. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  491. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  492. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  493. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  494. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  495. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  496. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  497. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  498. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  499. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  500. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  501. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  502. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  503. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  504. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  505. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  506. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  507. mindspore/_extends/graph_kernel/expander.py +0 -80
  508. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  509. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  510. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  511. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  512. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  513. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  514. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  515. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  516. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  517. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  518. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  519. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  520. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  521. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  522. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  523. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  524. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  525. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  526. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  527. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  528. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  529. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  530. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  531. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  532. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  533. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  534. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  535. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  536. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  537. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  538. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  539. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  540. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  541. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  542. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  543. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  544. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  545. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  546. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  547. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  548. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  549. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  550. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  551. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  552. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  553. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  554. mindspore/dataset/datapreprocess/__init__.py +0 -20
  555. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  556. mindspore/include/api/net.h +0 -142
  557. mindspore/nn/lr_scheduler.py +0 -262
  558. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  559. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  560. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  561. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  562. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  563. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  565. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  566. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  567. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  568. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  569. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  570. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  571. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  572. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  573. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  574. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  576. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  577. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  578. mindspore/rewrite/node_visitor.py +0 -44
  579. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
  580. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
@@ -29,33 +29,41 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
29
29
 
30
30
  class CharNGram(cde.CharNGram):
31
31
  """
32
- CharNGram object that is used to map tokens into pre-trained vectors.
32
+ CharNGram pre-trained word embeddings.
33
+
34
+ A word or sentence is represented using a character n-gram count vector, followed by a single
35
+ nonlinear transformation to yield a low-dimensional embedding.
33
36
  """
34
37
 
35
38
  @classmethod
36
39
  @check_from_file_vectors
37
40
  def from_file(cls, file_path, max_vectors=None):
38
41
  """
39
- Build a `CharNGram` vector from a file.
42
+ Load the CharNGram pre-training vector set file.
40
43
 
41
44
  Args:
42
- file_path (str): Path of the file that contains the `CharNGram` vectors.
43
- max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
45
+ file_path (str): Path to the CharNGram pre-training vector set file.
46
+ max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
44
47
  Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
45
48
  situations where the entire set doesn't fit in memory, or is not needed for another reason,
46
- passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
49
+ this value can limit the size of the loaded set. Default: ``None``, no upper limit.
47
50
 
48
51
  Returns:
49
- CharNGram, CharNGram vector build from a file.
52
+ CharNGram, CharNGram pre-training vectors.
50
53
 
51
54
  Raises:
52
- RuntimeError: If `file_path` contains invalid data.
53
- ValueError: If `max_vectors` is invalid.
54
- TypeError: If `max_vectors` is not type of integer.
55
+ TypeError: If `file_path` is not of type str.
56
+ RuntimeError: If `file_path` does not exist or is not accessible.
57
+ TypeError: If `max_vectors` is not of type int.
58
+ ValueError: If `max_vectors` is negative.
55
59
 
56
60
  Examples:
57
61
  >>> import mindspore.dataset.text as text
62
+ >>>
58
63
  >>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
64
+ >>> to_vectors = text.ToVectors(char_n_gram)
65
+ >>> # Look up a token into vectors according CharNGram model.
66
+ >>> word_vector = to_vectors(["word1", "word2"])
59
67
  """
60
68
 
61
69
  max_vectors = max_vectors if max_vectors is not None else 0
@@ -64,34 +72,40 @@ class CharNGram(cde.CharNGram):
64
72
 
65
73
  class FastText(cde.FastText):
66
74
  """
67
- FastText object that is used to map tokens into vectors.
75
+ FastText pre-trained word embeddings.
76
+
77
+ FastText allows one to create an unsupervised learning or supervised learning algorithm vector
78
+ representations for words.
68
79
  """
69
80
 
70
81
  @classmethod
71
82
  @check_from_file_vectors
72
83
  def from_file(cls, file_path, max_vectors=None):
73
84
  """
74
- Build a FastText vector from a file.
85
+ Load the FastText pre-training vector set file.
75
86
 
76
87
  Args:
77
- file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
78
- must be `*.vec` .
79
- max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
88
+ file_path (str): Path to the FastText pre-trained vector set file. File suffix should be `*.vec`.
89
+ max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
80
90
  Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
81
91
  situations where the entire set doesn't fit in memory, or is not needed for another reason,
82
- passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
92
+ this value can limit the size of the loaded set. Default: ``None``, no upper limit.
83
93
 
84
94
  Returns:
85
- FastText, FastText vector build from a file.
95
+ FastText, FastText pre-training vectors.
86
96
 
87
97
  Raises:
88
- RuntimeError: If `file_path` contains invalid data.
89
- ValueError: If `max_vectors` is invalid.
90
- TypeError: If `max_vectors` is not type of integer.
98
+ TypeError: If `file_path` is not of type str.
99
+ RuntimeError: If `file_path` does not exist or is not accessible.
100
+ TypeError: If `max_vectors` is not of type int.
101
+ ValueError: If `max_vectors` is negative.
91
102
 
92
103
  Examples:
93
104
  >>> import mindspore.dataset.text as text
94
105
  >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
106
+ >>> to_vectors = text.ToVectors(fast_text)
107
+ >>> # Look up a token into vectors according FastText model.
108
+ >>> word_vector = to_vectors(["word1", "word2"])
95
109
  """
96
110
 
97
111
  max_vectors = max_vectors if max_vectors is not None else 0
@@ -100,34 +114,39 @@ class FastText(cde.FastText):
100
114
 
101
115
  class GloVe(cde.GloVe):
102
116
  """
103
- GloVe object that is used to map tokens into vectors.
117
+ Global Vectors (GloVe) pre-trained word embeddings.
118
+
119
+ GloVe is an unsupervised learning algorithm for obtaining vector representations for word.
104
120
  """
105
121
 
106
122
  @classmethod
107
123
  @check_from_file_vectors
108
124
  def from_file(cls, file_path, max_vectors=None):
109
125
  """
110
- Build a GloVe vector from a file.
126
+ Load the GloVe pre-training vector set file.
111
127
 
112
128
  Args:
113
- file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
114
- must be `glove.6B.*.txt` .
115
- max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
129
+ file_path (str): Path to the GloVe pre-training vector set file. File name is similar to `glove.*.txt`.
130
+ max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
116
131
  Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
117
132
  situations where the entire set doesn't fit in memory, or is not needed for another reason,
118
- passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
133
+ this value can limit the size of the loaded set. Default: ``None``, no upper limit.
119
134
 
120
135
  Returns:
121
- GloVe, GloVe vector build from a file.
136
+ GloVe, GloVe pre-training vectors.
122
137
 
123
138
  Raises:
124
- RuntimeError: If `file_path` contains invalid data.
125
- ValueError: If `max_vectors` is invalid.
126
- TypeError: If `max_vectors` is not type of integer.
139
+ TypeError: If `file_path` is not of type str.
140
+ RuntimeError: If `file_path` does not exist or is not accessible.
141
+ TypeError: If `max_vectors` is not of type int.
142
+ ValueError: If `max_vectors` is negative.
127
143
 
128
144
  Examples:
129
145
  >>> import mindspore.dataset.text as text
130
146
  >>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
147
+ >>> to_vectors = text.ToVectors(glove)
148
+ >>> # Look up a token into vectors according GloVe model.
149
+ >>> word_vector = to_vectors(["word1", "word2"])
131
150
  """
132
151
 
133
152
  max_vectors = max_vectors if max_vectors is not None else 0
@@ -152,12 +171,11 @@ class JiebaMode(IntEnum):
152
171
 
153
172
  class NormalizeForm(IntEnum):
154
173
  """
155
- Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
174
+ `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
156
175
 
157
- Possible enumeration values are: ``NormalizeForm.NONE``, ``NormalizeForm.NFC``, ``NormalizeForm.NFKC``,
158
- ``NormalizeForm.NFD`` and ``NormalizeForm.NFKD``.
176
+ Available values are as follows:
159
177
 
160
- - NormalizeForm.NONE: no normalization.
178
+ - NormalizeForm.NONE: No normalization.
161
179
  - NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
162
180
  - NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
163
181
  - NormalizeForm.NFD: Canonical Decomposition.
@@ -173,17 +191,14 @@ class NormalizeForm(IntEnum):
173
191
 
174
192
  class SentencePieceModel(IntEnum):
175
193
  """
176
- An enumeration for SentencePieceModel.
194
+ Subword algorithms for SentencePiece.
177
195
 
178
- Possible enumeration values are: ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
179
- ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
196
+ Available values are as follows:
180
197
 
181
- - SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
182
- independent of the previous words generated by the model.
183
- - SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
184
- a sentence with a single, unused byte.
185
- - SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
186
- - SentencePieceModel.WORD: refers to word based sentencePiece Model type.
198
+ - SentencePieceModel.UNIGRAM: `Unigram Language Model <https://arxiv.org/abs/1804.10959>`_ subword algorithm.
199
+ - SentencePieceModel.BPE: `Byte-Pair-Encoding <https://arxiv.org/abs/1508.07909>`_ subword algorithm.
200
+ - SentencePieceModel.CHAR: Character-based subword algorithm.
201
+ - SentencePieceModel.WORD: Word-based subword algorithm.
187
202
  """
188
203
 
189
204
  UNIGRAM = 0
@@ -221,17 +236,8 @@ class SentencePieceVocab:
221
236
  character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
222
237
  languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
223
238
  character set.
224
- model_type (SentencePieceModel): It can be ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
225
- ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
226
- The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD type``.
227
-
228
- - ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
229
- is assumed to be independent of the previous words generated by the model.
230
- - ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
231
- frequent pair of bytes in a sentence with a single, unused byte.
232
- - ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
233
- - ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
234
-
239
+ model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
240
+ for details on optional values.
235
241
  params (dict): A dictionary with no incoming parameters.
236
242
 
237
243
  Returns:
@@ -239,10 +245,16 @@ class SentencePieceVocab:
239
245
 
240
246
  Examples:
241
247
  >>> import mindspore.dataset as ds
248
+ >>> import mindspore.dataset.text as text
249
+ >>>
242
250
  >>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
243
251
  >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
244
252
  >>> vocab = SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
245
253
  ... SentencePieceModel.UNIGRAM, {})
254
+ >>> # Build tokenizer based on vocab
255
+ >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
256
+ >>> txt = "Today is Tuesday."
257
+ >>> token = tokenizer(txt)
246
258
  """
247
259
 
248
260
  sentence_piece_vocab = cls()
@@ -264,17 +276,8 @@ class SentencePieceVocab:
264
276
  character_coverage (float): Amount of characters covered by the model. Recommend ``0.9995`` for
265
277
  languages with rich character set like Japanese or Chinese and ``1.0`` for other languages with small
266
278
  character set.
267
- model_type (SentencePieceModel): It can be ``SentencePieceModel.UNIGRAM``, ``SentencePieceModel.BPE``,
268
- ``SentencePieceModel.CHAR``, ``SentencePieceModel.WORD``.
269
- The input sentence must be pre-tokenized when using ``SentencePieceModel.WORD`` type.
270
-
271
- - ``SentencePieceModel.UNIGRAM``, Unigram Language Model means the next word in the sentence
272
- is assumed to be independent of the previous words generated by the model.
273
- - ``SentencePieceModel.BPE``, refers to byte pair encoding algorithm, which replaces the most
274
- frequent pair of bytes in a sentence with a single, unused byte.
275
- - ``SentencePieceModel.CHAR``, refers to char based sentencePiece Model type.
276
- - ``SentencePieceModel.WORD``, refers to word based sentencePiece Model type.
277
-
279
+ model_type (SentencePieceModel): The desired subword algorithm. See :class:`~.text.SentencePieceModel`
280
+ for details on optional values.
278
281
  params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
279
282
  library).
280
283
 
@@ -285,6 +288,10 @@ class SentencePieceVocab:
285
288
  >>> from mindspore.dataset.text import SentencePieceVocab, SentencePieceModel
286
289
  >>> vocab = SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
287
290
  ... SentencePieceModel.UNIGRAM, {})
291
+ >>> # Build tokenizer based on vocab model
292
+ >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=text.SPieceTokenizerOutType.STRING)
293
+ >>> txt = "Today is Friday."
294
+ >>> token = tokenizer(txt)
288
295
  """
289
296
 
290
297
  sentence_piece_vocab = cls()
@@ -315,12 +322,12 @@ class SentencePieceVocab:
315
322
 
316
323
  class SPieceTokenizerLoadType(IntEnum):
317
324
  """
318
- An enumeration for loading type of :class:`mindspore.dataset.text.SentencePieceTokenizer` .
325
+ Model input type for the SentencePiece tokenizer.
319
326
 
320
- Possible enumeration values are: ``SPieceTokenizerLoadType.FILE``, ``SPieceTokenizerLoadType.MODEL``.
327
+ Available values are as follows:
321
328
 
322
- - SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
323
- - SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
329
+ - SPieceTokenizerLoadType.FILE: Load model from specified file path.
330
+ - SPieceTokenizerLoadType.MODEL: Load model from specified vocab object.
324
331
  """
325
332
 
326
333
  FILE = 0
@@ -343,33 +350,37 @@ class SPieceTokenizerOutType(IntEnum):
343
350
 
344
351
  class Vectors(cde.Vectors):
345
352
  """
346
- Vectors object that is used to map tokens into vectors.
353
+ Pre-trained word embeddings.
347
354
  """
348
355
 
349
356
  @classmethod
350
357
  @check_from_file_vectors
351
358
  def from_file(cls, file_path, max_vectors=None):
352
359
  """
353
- Build a vector from a file.
360
+ Load a pre-training vector set file.
354
361
 
355
362
  Args:
356
- file_path (str): Path of the file that contains the vectors.
357
- max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
363
+ file_path (str): Path to the pre-training vector set file.
364
+ max_vectors (int, optional): The upper limit on the number of pre-trained vectors to load.
358
365
  Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
359
366
  situations where the entire set doesn't fit in memory, or is not needed for another reason,
360
- passing `max_vectors` can limit the size of the loaded set. Default: ``None``, no limit.
367
+ this value can limit the size of the loaded set. Default: ``None``, no upper limit.
361
368
 
362
369
  Returns:
363
- Vectors, Vectors build from a file.
370
+ Vectors, pre-training vectors.
364
371
 
365
372
  Raises:
366
- RuntimeError: If `file_path` contains invalid data.
367
- ValueError: If `max_vectors` is invalid.
368
- TypeError: If `max_vectors` is not type of integer.
373
+ TypeError: If `file_path` is not of type str.
374
+ RuntimeError: If `file_path` does not exist or is not accessible.
375
+ TypeError: If `max_vectors` is not of type int.
376
+ ValueError: If `max_vectors` is negative.
369
377
 
370
378
  Examples:
371
379
  >>> import mindspore.dataset.text as text
372
380
  >>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
381
+ >>> to_vectors = text.ToVectors(vector)
382
+ >>> # Look up a token into vectors according Vector model.
383
+ >>> word_vector = to_vectors(["word1", "word2"])
373
384
  """
374
385
 
375
386
  max_vectors = max_vectors if max_vectors is not None else 0
@@ -378,9 +389,9 @@ class Vectors(cde.Vectors):
378
389
 
379
390
  class Vocab:
380
391
  """
381
- Vocab object that is used to save pairs of words and ids.
392
+ Create Vocab for training NLP models.
382
393
 
383
- It contains a map that maps each word(str) to an id(int) or reverse.
394
+ Vocab is a collection of all possible Tokens in the data, preserving the mapping between each Token and its ID.
384
395
  """
385
396
 
386
397
  def __init__(self):
@@ -390,42 +401,52 @@ class Vocab:
390
401
  @check_from_dataset
391
402
  def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
392
403
  """
393
- Build a Vocab from a dataset.
404
+ Build a Vocab from a given dataset.
394
405
 
395
- This would collect all unique words in a dataset and return a vocab within
396
- the frequency range specified by user in freq_range. User would be warned if no words fall into the frequency.
397
- Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency
398
- would be ordered lexicographically.
406
+ The samples in the dataset are used as a corpus to create Vocab, in which the Token is arranged in ascending
407
+ order of Token frequency, and Tokens with the same frequency are arranged in alphabetical order.
399
408
 
400
409
  Args:
401
- dataset (Dataset): dataset to build vocab from.
402
- columns (list[str], optional): column names to get words from. It can be a list of column names.
403
- Default: ``None``.
404
- freq_range (tuple, optional): A tuple of integers (min_frequency, max_frequency). Words within the frequency
405
- range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency=0 is the same as
406
- min_frequency=1. max_frequency > total_words is the same as max_frequency = total_words.
407
- min_frequency/max_frequency can be ``None``, which corresponds to 0/total_words separately.
408
- Default: ``None``, all words are included.
409
- top_k (int, optional): top_k is greater than 0. Number of words to be built into vocab. top_k means most
410
- frequent words are taken. top_k is taken after freq_range. If not enough top_k, all words will be taken.
411
- Default: ``None``, all words are included.
412
- special_tokens (list, optional): A list of strings, each one is a special token. For example
413
- special_tokens=["<pad>","<unk>"]. Default: ``None``, no special tokens will be added.
414
- special_first (bool, optional): Whether `special_tokens` will be prepended/appended to vocab. If
415
- `special_tokens` is specified and `special_first` is set to ``True``, special_tokens will be prepended.
416
- Default: ``True``.
410
+ dataset (Dataset): The dataset to build the Vocab from.
411
+ columns (list[str], optional): The name of the data columns used to create the Vocab.
412
+ Default: ``None`` , use all columns.
413
+ freq_range (tuple[int, int], optional): The Token frequency range used to create the Vocab. Must contain
414
+ two elements representing the minimum and maximum frequencies, within which the Token will be retained.
415
+ When the minimum or maximum frequency is None, it means there is no minimum or maximum frequency limit.
416
+ Default: ``None`` , no Token frequency range restriction.
417
+ top_k (int, optional): Only the first specified number of Tokens with the highest Token frequency are
418
+ selected to build the Vocab. This operation will be performed after Token frequency filtering. If
419
+ the value is greater than the total number of Tokens, all Tokens will be retained. Default: ``None`` ,
420
+ there is no limit to the number of Tokens.
421
+ special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
422
+ no special Token is appended.
423
+ special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
424
+ the bottom of the Vocab. Default: ``True``.
417
425
 
418
426
  Returns:
419
- Vocab, Vocab object built from the dataset.
427
+ Vocab, Vocab built from the dataset.
428
+
429
+ Raises:
430
+ TypeError: If `columns` is not of type list[str].
431
+ TypeError: If `freq_range` is not of type tuple[int, int]l.
432
+ ValueError: If element of `freq_range` is negative.
433
+ TypeError: If `top_k` is not of type int.
434
+ ValueError: If `top_k` is not positive.
435
+ TypeError: If `special_tokens` is not of type list[str].
436
+ ValueError: If there are duplicate elements in `special_tokens`.
437
+ TypeError: If `special_first` is not of type bool.
420
438
 
421
439
  Examples:
422
440
  >>> import mindspore.dataset as ds
423
441
  >>> import mindspore.dataset.text as text
442
+ >>>
424
443
  >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
425
444
  >>> vocab = text.Vocab.from_dataset(dataset, "text", freq_range=None, top_k=None,
426
445
  ... special_tokens=["<pad>", "<unk>"],
427
446
  ... special_first=True)
428
- >>> dataset = dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
447
+ >>> # Use the vocab to look up string to id
448
+ >>> lookup = text.Lookup(vocab, "<unk>")
449
+ >>> id = lookup("text1")
429
450
  """
430
451
 
431
452
  vocab = cls()
@@ -437,22 +458,30 @@ class Vocab:
437
458
  @check_from_list
438
459
  def from_list(cls, word_list, special_tokens=None, special_first=True):
439
460
  """
440
- Build a vocab object from a list of word.
461
+ Build a Vocab from a given Token list.
441
462
 
442
463
  Args:
443
- word_list (list): A list of string where each element is a word of type string.
444
- special_tokens (list, optional): A list of strings, each one is a special token. For example,
445
- special_tokens is ``"<pad>"``, ``"<unk>"``. Default: ``None``, no special tokens will be added.
446
- special_first (bool, optional): Whether `special_tokens` is prepended or appended to vocab.
447
- If `special_tokens` is specified and special_first is set to ``True``,
448
- `special_tokens` will be prepended. Default: ``True``.
464
+ word_list (list[str]): The Token list to build the Vocab from.
465
+ special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
466
+ no special Token is appended.
467
+ special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
468
+ the bottom of the Vocab. Default: ``True``.
449
469
 
450
470
  Returns:
451
- Vocab, Vocab object built from the list.
471
+ Vocab, Vocab built from the list.
472
+
473
+ Raises:
474
+ TypeError: If `word_list` is not of type list[str].
475
+ ValueError: If there are duplicate elements in `word_list`.
476
+ TypeError: If `special_tokens` is not of type list[str].
477
+ ValueError: If there are duplicate elements in `special_tokens`.
478
+ TypeError: If `special_first` is not of type bool.
452
479
 
453
480
  Examples:
454
481
  >>> import mindspore.dataset.text as text
455
482
  >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
483
+ >>> # look up strings to ids
484
+ >>> ids = vocab.tokens_to_ids(["w1", "w3"])
456
485
  """
457
486
 
458
487
  if special_tokens is None:
@@ -465,21 +494,29 @@ class Vocab:
465
494
  @check_from_file
466
495
  def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=True):
467
496
  """
468
- Build a vocab object from a file.
497
+ Build a Vocab from a file.
469
498
 
470
499
  Args:
471
- file_path (str): Path to the file which contains the vocab list.
472
- delimiter (str, optional): A delimiter to break up each line in file, the first element is taken to be
473
- the word. Default: ``''``, the whole line will be treated as a word.
474
- vocab_size (int, optional): Number of words to read from file_path. Default: ``None``, all words are taken.
475
- special_tokens (list, optional): A list of strings, each one is a special token. For example
476
- special_tokens=["<pad>","<unk>"]. Default: ``None``, no special tokens will be added.
477
- special_first (bool, optional): Whether `special_tokens` will be prepended/appended to vocab,
478
- If special_tokens is specified and `special_first` is set to ``True``,
479
- special_tokens will be prepended. Default: ``True``.
500
+ file_path (str): The path of the file to build the Vocab from.
501
+ delimiter (str, optional): The separator for the Token in the file line. The string before the separator
502
+ will be treated as a Token. Default: ``''``, the whole line will be treated as a Token.
503
+ vocab_size (int, optional): The upper limit on the number of Tokens that Vocab can contain.
504
+ Default: ``None`` , no upper limit on the number of Token.
505
+ special_tokens (list[str], optional): A list of special Token to append to the Vocab. Default: ``None`` ,
506
+ no special Token is appended.
507
+ special_first (bool, optional): Whether to add the special Token to the top of the Vocab, otherwise to
508
+ the bottom of the Vocab. Default: ``True``.
480
509
 
481
510
  Returns:
482
- Vocab, Vocab object built from the file.
511
+ Vocab, Vocab built from the file.
512
+
513
+ Raises:
514
+ TypeError: If `file_path` is not of type str.
515
+ TypeError: If `delimiter` is not of type str.
516
+ ValueError: If `vocab_size` is not positive.
517
+ TypeError: If `special_tokens` is not of type list[str].
518
+ ValueError: If there are duplicate elements in `special_tokens`.
519
+ TypeError: If `special_first` is not of type bool.
483
520
 
484
521
  Examples:
485
522
  >>> import mindspore.dataset.text as text
@@ -496,6 +533,9 @@ class Vocab:
496
533
  >>>
497
534
  >>> # Finally, there are 5 words in the vocab: "<pad>", "<unk>", "apple", "banana", "cat".
498
535
  >>> vocabulary = vocab.vocab()
536
+ >>>
537
+ >>> # look up strings to ids
538
+ >>> ids = vocab.tokens_to_ids(["apple", "banana"])
499
539
  """
500
540
 
501
541
  if vocab_size is None:
@@ -510,18 +550,26 @@ class Vocab:
510
550
  @check_from_dict
511
551
  def from_dict(cls, word_dict):
512
552
  """
513
- Build a vocab object from a dict.
553
+ Build a Vocab from a given dictionary.
514
554
 
515
555
  Args:
516
- word_dict (dict): Dict contains word and id pairs, where word should be str and id be int. id is recommended
517
- to start from 0 and be continuous. ValueError will be raised if id is negative.
556
+ word_dict (dict[str, int]): A dictionary storing the mappings between each Token and its ID.
518
557
 
519
558
  Returns:
520
- Vocab, Vocab object built from the dict.
559
+ Vocab, Vocab built from the dictionary.
560
+
561
+ Raises:
562
+ TypeError: If `word_dict` is not of type dict[str, int].
563
+ ValueError: If key value of `word_dict` is negative.
521
564
 
522
565
  Examples:
523
566
  >>> import mindspore.dataset.text as text
524
567
  >>> vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
568
+ >>>
569
+ >>> # look up ids to string
570
+ >>> tokens = vocab.ids_to_tokens([3, 4, 5])
571
+ >>> print(tokens)
572
+ ['home', 'the', 'world']
525
573
  """
526
574
 
527
575
  vocab = cls()
@@ -530,15 +578,17 @@ class Vocab:
530
578
 
531
579
  def vocab(self):
532
580
  """
533
- Get the vocabory table in dict type.
581
+ Get the dictionary of the mappings between Tokens and its IDs.
534
582
 
535
583
  Returns:
536
- A vocabulary consisting of word and id pairs.
584
+ dict[str, int], the dictionary of mappings between Tokens and IDs.
537
585
 
538
586
  Examples:
539
587
  >>> import mindspore.dataset.text as text
540
588
  >>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
541
589
  >>> vocabory_dict = vocab.vocab()
590
+ >>> print(sorted(vocabory_dict.items()))
591
+ [('word_1', 0), ('word_2', 1), ('word_3', 2), ('word_4', 3)]
542
592
  """
543
593
  check_vocab(self.c_vocab)
544
594
  return self.c_vocab.vocab()
@@ -546,19 +596,24 @@ class Vocab:
546
596
  @check_tokens_to_ids
547
597
  def tokens_to_ids(self, tokens):
548
598
  """
549
- Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
550
- If token does not exist, return id with value -1.
599
+ Look up the ID corresponding to the specified Token.
551
600
 
552
601
  Args:
553
- tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).
602
+ tokens (Union[str, list[str], numpy.ndarray]): The Token or list of Tokens to be looked up.
603
+ If the Token does not exist, -1 is returned.
554
604
 
555
605
  Returns:
556
- The token id or list of token ids.
606
+ Union[int, list[int]], the ID(s) corresponding to the Token(s).
607
+
608
+ Raises:
609
+ TypeError: If `tokens` is not of type Union[str, list[str], numpy.ndarray].
557
610
 
558
611
  Examples:
559
612
  >>> import mindspore.dataset.text as text
560
613
  >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
561
614
  >>> ids = vocab.tokens_to_ids(["w1", "w3"])
615
+ >>> print(ids)
616
+ [1, 3]
562
617
  """
563
618
  check_vocab(self.c_vocab)
564
619
  if isinstance(tokens, np.ndarray):
@@ -570,19 +625,25 @@ class Vocab:
570
625
  @check_ids_to_tokens
571
626
  def ids_to_tokens(self, ids):
572
627
  """
573
- Converts a single index or a sequence of indices in a token or a sequence of tokens.
574
- If id does not exist, return empty string.
628
+ Look up the Token corresponding to the specified ID.
575
629
 
576
630
  Args:
577
- ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
631
+ ids (Union[int, list[int], numpy.ndarray]): The ID or list of IDs to be looked up.
632
+ If the ID does not exist, an empty string is returned.
578
633
 
579
634
  Returns:
580
- The decoded token(s).
635
+ Union[str, list[str]], the Token(s) corresponding to the ID(s).
636
+
637
+ Raises:
638
+ TypeError: If `ids` is not of type Union[int, list[int], numpy.ndarray].
639
+ ValueError: If element of `ids` is negative.
581
640
 
582
641
  Examples:
583
642
  >>> import mindspore.dataset.text as text
584
643
  >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
585
- >>> token = vocab.ids_to_tokens(0)
644
+ >>> token = vocab.ids_to_tokens(1)
645
+ >>> print(token)
646
+ w1
586
647
  """
587
648
  check_vocab(self.c_vocab)
588
649
  if isinstance(ids, np.ndarray):
@@ -610,8 +671,11 @@ def to_bytes(array, encoding='utf8'):
610
671
  >>>
611
672
  >>> data = np.array([["1", "2", "3"]], dtype=np.str_)
612
673
  >>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
674
+ >>> result = []
613
675
  >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
614
- ... bytes_data = text.to_bytes(item["text"])
676
+ ... result.append(text.to_bytes(item["text"]))
677
+ >>> print(result)
678
+ [array([b'1', b'2', b'3'], dtype='|S1')]
615
679
  """
616
680
 
617
681
  if not isinstance(array, np.ndarray):
@@ -638,8 +702,11 @@ def to_str(array, encoding='utf8'):
638
702
  >>>
639
703
  >>> data = np.array([["1", "2", "3"]], dtype=np.bytes_)
640
704
  >>> dataset = ds.NumpySlicesDataset(data, column_names=["text"])
705
+ >>> result = []
641
706
  >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
642
- ... str_data = text.to_str(item["text"])
707
+ ... result.append(text.to_str(item["text"]))
708
+ >>> print(result)
709
+ [array(['1', '2', '3'], dtype='<U1')]
643
710
  """
644
711
 
645
712
  if not isinstance(array, np.ndarray):