mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (870) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +2 -2
  3. mindspore/__init__.py +5 -2
  4. mindspore/_akg/akg/build_module.py +5 -6
  5. mindspore/_akg/akg/composite/build_module.py +49 -16
  6. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  7. mindspore/_akg/akg/config/repository.json +195 -0
  8. mindspore/_akg/akg/global_configs.py +5 -1
  9. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  10. mindspore/_akg/akg/tvm/api.py +4 -3
  11. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  12. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  13. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  14. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  15. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  16. mindspore/_akg/akg/tvm/build_module.py +16 -1
  17. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  18. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  19. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  20. mindspore/_akg/akg/tvm/module.py +1 -2
  21. mindspore/_akg/akg/tvm/stmt.py +2 -2
  22. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  23. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  24. mindspore/_akg/akg/utils/op_dsl.py +17 -1
  25. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  26. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  27. mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
  28. mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
  29. mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
  30. mindspore/_check_jit_forbidden_api.py +5 -1
  31. mindspore/_checkparam.py +79 -62
  32. mindspore/_extends/graph_kernel/__init__.py +0 -1
  33. mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
  34. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  35. mindspore/_extends/graph_kernel/splitter.py +1 -9
  36. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
  37. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  38. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  39. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
  40. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
  41. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  42. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  43. mindspore/_extends/parse/__init__.py +19 -17
  44. mindspore/_extends/parse/namespace.py +7 -36
  45. mindspore/_extends/parse/parser.py +375 -189
  46. mindspore/_extends/parse/resources.py +36 -41
  47. mindspore/_extends/parse/standard_method.py +350 -245
  48. mindspore/_extends/parse/trope.py +2 -12
  49. mindspore/_extends/remote/kernel_build_server.py +24 -7
  50. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  51. mindspore/_install_custom.py +43 -0
  52. mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
  53. mindspore/amp.py +85 -19
  54. mindspore/bin/cache_admin +0 -0
  55. mindspore/bin/cache_server +0 -0
  56. mindspore/boost/base.py +2 -2
  57. mindspore/boost/boost.py +27 -32
  58. mindspore/boost/boost_cell_wrapper.py +37 -13
  59. mindspore/boost/grad_accumulation.py +1 -1
  60. mindspore/boost/grad_freeze.py +34 -6
  61. mindspore/boost/group_loss_scale_manager.py +15 -14
  62. mindspore/boost/less_batch_normalization.py +28 -3
  63. mindspore/common/__init__.py +15 -11
  64. mindspore/common/_auto_dynamic.py +68 -0
  65. mindspore/common/_jit_fallback_utils.py +111 -0
  66. mindspore/common/_register_for_adapter.py +17 -5
  67. mindspore/common/_register_for_tensor.py +2 -2
  68. mindspore/common/_stub_tensor.py +18 -15
  69. mindspore/common/_utils.py +31 -7
  70. mindspore/common/api.py +269 -101
  71. mindspore/common/auto_dynamic_shape.py +498 -0
  72. mindspore/common/dtype.py +61 -21
  73. mindspore/common/dump.py +9 -7
  74. mindspore/common/initializer.py +106 -76
  75. mindspore/common/jit_config.py +35 -14
  76. mindspore/common/lazy_inline.py +187 -0
  77. mindspore/common/mindir_util.py +101 -0
  78. mindspore/common/mutable.py +10 -13
  79. mindspore/common/parameter.py +246 -55
  80. mindspore/common/seed.py +13 -7
  81. mindspore/common/sparse_tensor.py +29 -33
  82. mindspore/common/tensor.py +907 -251
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +84 -4
  85. mindspore/communication/management.py +160 -88
  86. mindspore/config/op_info.config +99 -75
  87. mindspore/config/super_bar_config.json +36 -4
  88. mindspore/context.py +526 -219
  89. mindspore/dataset/__init__.py +9 -46
  90. mindspore/dataset/audio/__init__.py +4 -19
  91. mindspore/dataset/audio/transforms.py +545 -233
  92. mindspore/dataset/audio/utils.py +21 -18
  93. mindspore/dataset/callback/ds_callback.py +42 -13
  94. mindspore/dataset/core/config.py +158 -100
  95. mindspore/dataset/core/validator_helpers.py +1 -63
  96. mindspore/dataset/debug/debug_hook.py +45 -13
  97. mindspore/dataset/debug/pre_defined_hook.py +5 -5
  98. mindspore/dataset/engine/__init__.py +0 -5
  99. mindspore/dataset/engine/cache_client.py +38 -15
  100. mindspore/dataset/engine/datasets.py +615 -278
  101. mindspore/dataset/engine/datasets_audio.py +154 -283
  102. mindspore/dataset/engine/datasets_standard_format.py +104 -116
  103. mindspore/dataset/engine/datasets_text.py +443 -326
  104. mindspore/dataset/engine/datasets_user_defined.py +251 -164
  105. mindspore/dataset/engine/datasets_vision.py +839 -1443
  106. mindspore/dataset/engine/iterators.py +11 -4
  107. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
  108. mindspore/dataset/engine/obs/util.py +3 -0
  109. mindspore/dataset/engine/offload.py +6 -6
  110. mindspore/dataset/engine/queue.py +15 -14
  111. mindspore/dataset/engine/samplers.py +39 -23
  112. mindspore/dataset/engine/serializer_deserializer.py +22 -6
  113. mindspore/dataset/engine/validators.py +21 -331
  114. mindspore/dataset/text/__init__.py +5 -33
  115. mindspore/dataset/text/transforms.py +334 -165
  116. mindspore/dataset/text/utils.py +215 -145
  117. mindspore/dataset/transforms/__init__.py +1 -1
  118. mindspore/dataset/transforms/c_transforms.py +3 -2
  119. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  120. mindspore/dataset/transforms/transforms.py +174 -71
  121. mindspore/dataset/utils/browse_dataset.py +25 -17
  122. mindspore/dataset/utils/line_reader.py +24 -21
  123. mindspore/dataset/vision/__init__.py +5 -26
  124. mindspore/dataset/vision/c_transforms.py +177 -165
  125. mindspore/dataset/vision/py_transforms.py +114 -119
  126. mindspore/dataset/vision/py_transforms_util.py +54 -51
  127. mindspore/dataset/vision/transforms.py +1127 -381
  128. mindspore/dataset/vision/utils.py +54 -38
  129. mindspore/dataset/vision/validators.py +12 -2
  130. mindspore/experimental/map_parameter.py +38 -4
  131. mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
  132. mindspore/experimental/optim/adam.py +192 -0
  133. mindspore/experimental/optim/adamw.py +181 -0
  134. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  135. mindspore/experimental/optim/optimizer.py +252 -0
  136. mindspore/experimental/optim/sgd.py +147 -0
  137. mindspore/gen_ops.py +273 -0
  138. mindspore/include/OWNERS +1 -2
  139. mindspore/include/api/context.h +21 -1
  140. mindspore/include/api/data_type.h +2 -1
  141. mindspore/include/api/graph.h +0 -15
  142. mindspore/include/api/kernel.h +2 -0
  143. mindspore/include/api/kernel_api.h +37 -12
  144. mindspore/include/api/model.h +29 -42
  145. mindspore/include/api/model_group.h +14 -3
  146. mindspore/include/api/model_parallel_runner.h +18 -2
  147. mindspore/include/api/serialization.h +26 -0
  148. mindspore/include/api/status.h +1 -0
  149. mindspore/include/api/types.h +38 -4
  150. mindspore/include/c_api/ms/abstract.h +67 -0
  151. mindspore/include/c_api/ms/attribute.h +197 -0
  152. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  153. mindspore/include/c_api/ms/base/macros.h +32 -0
  154. mindspore/include/c_api/ms/base/status.h +33 -0
  155. mindspore/include/c_api/ms/base/types.h +282 -0
  156. mindspore/include/c_api/ms/context.h +102 -0
  157. mindspore/include/c_api/ms/graph.h +160 -0
  158. mindspore/include/c_api/ms/node.h +606 -0
  159. mindspore/include/c_api/ms/tensor.h +161 -0
  160. mindspore/include/c_api/ms/value.h +84 -0
  161. mindspore/include/c_api/status_c.h +3 -0
  162. mindspore/include/dataset/constants.h +6 -12
  163. mindspore/include/dataset/execute.h +23 -13
  164. mindspore/include/dataset/text.h +26 -26
  165. mindspore/include/dataset/transforms.h +25 -31
  166. mindspore/include/dataset/vision.h +60 -60
  167. mindspore/include/dataset/vision_ascend.h +5 -6
  168. mindspore/include/dataset/vision_lite.h +17 -17
  169. mindspore/include/mindapi/base/format.h +0 -1
  170. mindspore/include/mindapi/base/type_id.h +2 -1
  171. mindspore/include/mindapi/base/types.h +5 -1
  172. mindspore/lib/libdnnl.so.2 +0 -0
  173. mindspore/lib/libjemalloc.so.2 +0 -0
  174. mindspore/lib/libmindspore.so +0 -0
  175. mindspore/lib/libmindspore_backend.so +0 -0
  176. mindspore/lib/libmindspore_common.so +0 -0
  177. mindspore/lib/libmindspore_core.so +0 -0
  178. mindspore/lib/libmindspore_glog.so.0 +0 -0
  179. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  180. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  181. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  182. mindspore/lib/libmindspore_shared_lib.so +0 -0
  183. mindspore/lib/libmpi_adapter.so +0 -0
  184. mindspore/lib/libnnacl.so +0 -0
  185. mindspore/lib/libopencv_core.so.4.5 +0 -0
  186. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  187. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  188. mindspore/lib/libps_cache.so +0 -0
  189. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  192. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  193. mindspore/lib/plugin/ascend/libakg.so +0 -0
  194. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  195. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  196. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  197. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  198. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  199. mindspore/lib/plugin/cpu/libakg.so +0 -0
  200. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  201. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  202. mindspore/log.py +9 -6
  203. mindspore/mindrecord/filereader.py +33 -4
  204. mindspore/mindrecord/filewriter.py +70 -35
  205. mindspore/mindrecord/mindpage.py +40 -34
  206. mindspore/mindrecord/shardreader.py +1 -1
  207. mindspore/mindrecord/shardsegment.py +1 -1
  208. mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
  209. mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
  210. mindspore/mindrecord/tools/csv_to_mr.py +29 -13
  211. mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
  212. mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
  213. mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
  214. mindspore/nn/cell.py +463 -169
  215. mindspore/nn/dynamic_lr.py +47 -43
  216. mindspore/nn/layer/activation.py +225 -82
  217. mindspore/nn/layer/basic.py +121 -79
  218. mindspore/nn/layer/channel_shuffle.py +21 -21
  219. mindspore/nn/layer/combined.py +33 -26
  220. mindspore/nn/layer/container.py +277 -22
  221. mindspore/nn/layer/conv.py +441 -304
  222. mindspore/nn/layer/dense.py +19 -13
  223. mindspore/nn/layer/embedding.py +62 -49
  224. mindspore/nn/layer/flash_attention.py +264 -0
  225. mindspore/nn/layer/image.py +50 -39
  226. mindspore/nn/layer/math.py +62 -51
  227. mindspore/nn/layer/normalization.py +219 -167
  228. mindspore/nn/layer/padding.py +58 -70
  229. mindspore/nn/layer/pooling.py +334 -287
  230. mindspore/nn/layer/rnn_cells.py +53 -38
  231. mindspore/nn/layer/rnns.py +59 -56
  232. mindspore/nn/layer/thor_layer.py +52 -44
  233. mindspore/nn/layer/timedistributed.py +6 -4
  234. mindspore/nn/layer/transformer.py +284 -164
  235. mindspore/nn/learning_rate_schedule.py +34 -25
  236. mindspore/nn/loss/__init__.py +3 -2
  237. mindspore/nn/loss/loss.py +554 -311
  238. mindspore/nn/optim/ada_grad.py +12 -9
  239. mindspore/nn/optim/adadelta.py +14 -11
  240. mindspore/nn/optim/adafactor.py +19 -16
  241. mindspore/nn/optim/adam.py +62 -47
  242. mindspore/nn/optim/adamax.py +13 -10
  243. mindspore/nn/optim/adasum.py +12 -8
  244. mindspore/nn/optim/asgd.py +10 -9
  245. mindspore/nn/optim/ftrl.py +20 -17
  246. mindspore/nn/optim/lamb.py +16 -12
  247. mindspore/nn/optim/lars.py +8 -6
  248. mindspore/nn/optim/lazyadam.py +25 -20
  249. mindspore/nn/optim/momentum.py +10 -7
  250. mindspore/nn/optim/optimizer.py +61 -9
  251. mindspore/nn/optim/proximal_ada_grad.py +14 -13
  252. mindspore/nn/optim/rmsprop.py +17 -13
  253. mindspore/nn/optim/rprop.py +30 -17
  254. mindspore/nn/optim/sgd.py +40 -23
  255. mindspore/nn/optim/thor.py +24 -26
  256. mindspore/nn/probability/bijector/bijector.py +11 -11
  257. mindspore/nn/probability/bijector/exp.py +1 -1
  258. mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
  259. mindspore/nn/probability/bijector/invert.py +1 -1
  260. mindspore/nn/probability/bijector/power_transform.py +29 -29
  261. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  262. mindspore/nn/probability/bijector/softplus.py +5 -5
  263. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
  264. mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
  265. mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
  266. mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
  267. mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
  268. mindspore/nn/probability/distribution/_utils/utils.py +1 -1
  269. mindspore/nn/probability/distribution/bernoulli.py +9 -9
  270. mindspore/nn/probability/distribution/beta.py +8 -8
  271. mindspore/nn/probability/distribution/categorical.py +23 -15
  272. mindspore/nn/probability/distribution/cauchy.py +5 -6
  273. mindspore/nn/probability/distribution/distribution.py +3 -3
  274. mindspore/nn/probability/distribution/exponential.py +4 -4
  275. mindspore/nn/probability/distribution/gamma.py +10 -10
  276. mindspore/nn/probability/distribution/geometric.py +8 -8
  277. mindspore/nn/probability/distribution/gumbel.py +8 -9
  278. mindspore/nn/probability/distribution/half_normal.py +5 -5
  279. mindspore/nn/probability/distribution/laplace.py +5 -5
  280. mindspore/nn/probability/distribution/log_normal.py +12 -11
  281. mindspore/nn/probability/distribution/logistic.py +8 -8
  282. mindspore/nn/probability/distribution/normal.py +6 -5
  283. mindspore/nn/probability/distribution/poisson.py +10 -11
  284. mindspore/nn/probability/distribution/student_t.py +8 -9
  285. mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
  286. mindspore/nn/probability/distribution/uniform.py +11 -11
  287. mindspore/nn/reinforcement/tensor_array.py +2 -2
  288. mindspore/nn/sparse/sparse.py +9 -9
  289. mindspore/nn/wrap/cell_wrapper.py +188 -63
  290. mindspore/nn/wrap/grad_reducer.py +21 -12
  291. mindspore/nn/wrap/loss_scale.py +136 -49
  292. mindspore/numpy/__init__.py +4 -4
  293. mindspore/numpy/array_creations.py +55 -56
  294. mindspore/numpy/array_ops.py +134 -35
  295. mindspore/numpy/logic_ops.py +66 -20
  296. mindspore/numpy/math_ops.py +142 -139
  297. mindspore/numpy/utils_const.py +2 -2
  298. mindspore/offline_debug/convert_async.py +2 -2
  299. mindspore/ops/_grad_experimental/__init__.py +7 -5
  300. mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
  301. mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
  302. mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
  303. mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
  304. mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
  305. mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
  306. mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
  307. mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
  308. mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
  309. mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
  310. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
  311. mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
  312. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  313. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  314. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
  315. mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
  316. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
  317. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
  318. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
  319. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
  320. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  321. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
  322. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
  323. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  325. mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
  326. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  327. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  328. mindspore/ops/_op_impl/aicpu/cast.py +52 -0
  329. mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
  330. mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
  331. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  332. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
  333. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  334. mindspore/ops/_op_impl/aicpu/eye.py +4 -4
  335. mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
  336. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
  337. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  338. mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
  339. mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
  340. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  341. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  342. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  343. mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
  344. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
  345. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  346. mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
  347. mindspore/ops/_op_impl/aicpu/median.py +1 -0
  348. mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
  349. mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
  350. mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
  351. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
  352. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  353. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  354. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  355. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  356. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  357. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
  358. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
  359. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
  360. mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
  361. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  362. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  363. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  364. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  365. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
  366. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
  367. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  368. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  369. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  370. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  371. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  372. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
  373. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
  374. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
  375. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
  376. mindspore/ops/_op_impl/tbe/__init__.py +6 -4
  377. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  378. mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
  379. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
  380. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
  381. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
  382. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
  383. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
  384. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  385. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
  386. mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
  387. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
  388. mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
  389. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
  390. mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
  391. mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
  392. mindspore/ops/_op_impl/tbe/im2col.py +4 -4
  393. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  394. mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
  395. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
  396. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
  397. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  398. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
  399. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  400. mindspore/ops/_primitive_cache.py +1 -1
  401. mindspore/ops/_tracefunc.py +241 -0
  402. mindspore/ops/_utils/utils.py +10 -2
  403. mindspore/ops/_vmap/vmap_array_ops.py +5 -3
  404. mindspore/ops/_vmap/vmap_base.py +5 -4
  405. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  406. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  407. mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
  408. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  409. mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
  410. mindspore/ops/arg_dtype_cast.py +54 -0
  411. mindspore/ops/composite/__init__.py +7 -5
  412. mindspore/ops/composite/base.py +78 -34
  413. mindspore/ops/composite/math_ops.py +5 -695
  414. mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
  415. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
  416. mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
  417. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  418. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  419. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
  420. mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
  421. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
  422. mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
  423. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
  424. mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
  425. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
  426. mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
  427. mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
  428. mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
  429. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
  430. mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
  431. mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
  432. mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
  433. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  434. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  435. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
  436. mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
  437. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
  438. mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
  439. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  440. mindspore/ops/deprecated.py +304 -0
  441. mindspore/ops/function/__init__.py +41 -4
  442. mindspore/ops/function/array_func.py +1108 -467
  443. mindspore/ops/function/clip_func.py +94 -27
  444. mindspore/ops/function/debug_func.py +3 -1
  445. mindspore/ops/function/grad/grad_func.py +82 -73
  446. mindspore/ops/function/image_func.py +28 -12
  447. mindspore/ops/function/linalg_func.py +135 -39
  448. mindspore/ops/function/math_func.py +3779 -894
  449. mindspore/ops/function/nn_func.py +1584 -657
  450. mindspore/ops/function/parameter_func.py +13 -3
  451. mindspore/ops/function/random_func.py +247 -153
  452. mindspore/ops/function/sparse_func.py +14 -11
  453. mindspore/ops/function/sparse_unary_func.py +173 -47
  454. mindspore/ops/function/spectral_func.py +8 -4
  455. mindspore/ops/function/vmap_func.py +8 -7
  456. mindspore/ops/functional.py +47 -16
  457. mindspore/ops/op_info_register.py +346 -86
  458. mindspore/ops/operations/__init__.py +38 -22
  459. mindspore/ops/operations/_grad_ops.py +145 -149
  460. mindspore/ops/operations/_inner_ops.py +298 -56
  461. mindspore/ops/operations/_ms_kernel.py +3 -3
  462. mindspore/ops/operations/_quant_ops.py +24 -28
  463. mindspore/ops/operations/_rl_inner_ops.py +9 -7
  464. mindspore/ops/operations/_scalar_ops.py +115 -0
  465. mindspore/ops/operations/_sequence_ops.py +148 -10
  466. mindspore/ops/operations/_tensor_array.py +1 -1
  467. mindspore/ops/operations/_thor_ops.py +2 -2
  468. mindspore/ops/operations/array_ops.py +1239 -561
  469. mindspore/ops/operations/comm_ops.py +166 -90
  470. mindspore/ops/operations/control_ops.py +3 -3
  471. mindspore/ops/operations/custom_ops.py +124 -102
  472. mindspore/ops/operations/debug_ops.py +24 -11
  473. mindspore/ops/operations/image_ops.py +86 -71
  474. mindspore/ops/operations/inner_ops.py +18 -13
  475. mindspore/ops/operations/linalg_ops.py +30 -11
  476. mindspore/ops/operations/math_ops.py +1730 -435
  477. mindspore/ops/operations/nn_ops.py +1953 -943
  478. mindspore/ops/operations/other_ops.py +65 -43
  479. mindspore/ops/operations/random_ops.py +258 -98
  480. mindspore/ops/operations/rl_ops.py +4 -36
  481. mindspore/ops/operations/sparse_ops.py +38 -33
  482. mindspore/ops/operations/spectral_ops.py +8 -4
  483. mindspore/ops/primitive.py +66 -44
  484. mindspore/ops/signature.py +5 -5
  485. mindspore/parallel/_auto_parallel_context.py +80 -19
  486. mindspore/parallel/_cost_model_context.py +42 -0
  487. mindspore/parallel/_offload_context.py +162 -72
  488. mindspore/parallel/_parallel_serialization.py +2 -2
  489. mindspore/parallel/_ps_context.py +16 -4
  490. mindspore/parallel/_recovery_context.py +2 -1
  491. mindspore/parallel/_tensor.py +15 -13
  492. mindspore/parallel/_transformer/layers.py +8 -6
  493. mindspore/parallel/_transformer/loss.py +1 -0
  494. mindspore/parallel/_transformer/moe.py +7 -7
  495. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  496. mindspore/parallel/_transformer/transformer.py +34 -14
  497. mindspore/parallel/_utils.py +36 -14
  498. mindspore/parallel/algo_parameter_config.py +114 -20
  499. mindspore/parallel/checkpoint_transform.py +16 -18
  500. mindspore/parallel/shard.py +16 -13
  501. mindspore/profiler/__init__.py +1 -1
  502. mindspore/profiler/common/struct_type.py +3 -3
  503. mindspore/profiler/common/util.py +3 -2
  504. mindspore/profiler/envprofiling.py +11 -4
  505. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  506. mindspore/profiler/parser/ascend_flops_generator.py +94 -0
  507. mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
  508. mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
  509. mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
  510. mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
  511. mindspore/profiler/parser/ascend_op_generator.py +276 -0
  512. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  513. mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
  514. mindspore/profiler/parser/base_timeline_generator.py +11 -7
  515. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
  516. mindspore/profiler/parser/flops_parser.py +15 -11
  517. mindspore/profiler/parser/framework_parser.py +92 -73
  518. mindspore/profiler/parser/hccl_parser.py +16 -12
  519. mindspore/profiler/parser/integrator.py +22 -11
  520. mindspore/profiler/parser/memory_usage_parser.py +36 -11
  521. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  522. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  523. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  524. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  525. mindspore/profiler/parser/optime_parser.py +1 -1
  526. mindspore/profiler/parser/profiler_info.py +4 -5
  527. mindspore/profiler/parser/step_trace_parser.py +11 -14
  528. mindspore/profiler/profiling.py +678 -377
  529. mindspore/rewrite/api/node.py +211 -54
  530. mindspore/rewrite/api/node_type.py +5 -0
  531. mindspore/rewrite/api/pattern_engine.py +22 -23
  532. mindspore/rewrite/api/scoped_value.py +20 -17
  533. mindspore/rewrite/api/symbol_tree.py +252 -106
  534. mindspore/rewrite/api/tree_node_helper.py +3 -0
  535. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  536. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  537. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  538. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
  539. mindspore/rewrite/common/rewrite_elog.py +5 -1
  540. mindspore/rewrite/namer.py +51 -51
  541. mindspore/rewrite/namespace.py +14 -5
  542. mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
  543. mindspore/rewrite/node/call_function.py +79 -0
  544. mindspore/rewrite/node/cell_container.py +135 -0
  545. mindspore/rewrite/node/control_flow.py +88 -0
  546. mindspore/rewrite/{node.py → node/node.py} +313 -247
  547. mindspore/rewrite/node/node_manager.py +254 -0
  548. mindspore/rewrite/node/node_topological_manager.py +243 -0
  549. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  550. mindspore/rewrite/parsers/assign_parser.py +225 -239
  551. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  552. mindspore/rewrite/parsers/class_def_parser.py +179 -218
  553. mindspore/rewrite/parsers/constant_parser.py +9 -6
  554. mindspore/rewrite/parsers/container_parser.py +9 -7
  555. mindspore/rewrite/parsers/for_parser.py +36 -15
  556. mindspore/rewrite/parsers/function_def_parser.py +23 -20
  557. mindspore/rewrite/parsers/if_parser.py +28 -24
  558. mindspore/rewrite/parsers/module_parser.py +202 -25
  559. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  560. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  561. mindspore/rewrite/parsers/return_parser.py +6 -6
  562. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  563. mindspore/rewrite/sparsify/sparsify.py +4 -1
  564. mindspore/rewrite/sparsify/utils.py +11 -5
  565. mindspore/rewrite/symbol_tree.py +577 -732
  566. mindspore/rewrite/symbol_tree_builder.py +9 -175
  567. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  568. mindspore/run_check/_check_version.py +46 -39
  569. mindspore/run_check/run_check.py +3 -2
  570. mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
  571. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  572. mindspore/scipy/__init__.py +1 -1
  573. mindspore/scipy/linalg.py +67 -61
  574. mindspore/scipy/ops.py +5 -41
  575. mindspore/scipy/ops_grad.py +3 -2
  576. mindspore/scipy/ops_wrapper.py +5 -5
  577. mindspore/scipy/optimize/line_search.py +8 -8
  578. mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
  579. mindspore/scipy/optimize/minimize.py +16 -12
  580. mindspore/scipy/utils.py +1 -52
  581. mindspore/scipy/utils_const.py +4 -4
  582. mindspore/train/__init__.py +4 -4
  583. mindspore/train/_utils.py +13 -5
  584. mindspore/train/amp.py +410 -148
  585. mindspore/train/anf_ir_pb2.py +16 -4
  586. mindspore/train/callback/_backup_and_restore.py +8 -11
  587. mindspore/train/callback/_callback.py +80 -3
  588. mindspore/train/callback/_checkpoint.py +82 -51
  589. mindspore/train/callback/_early_stop.py +12 -15
  590. mindspore/train/callback/_history.py +1 -1
  591. mindspore/train/callback/_lambda_callback.py +13 -13
  592. mindspore/train/callback/_landscape.py +21 -17
  593. mindspore/train/callback/_loss_monitor.py +9 -10
  594. mindspore/train/callback/_on_request_exit.py +16 -33
  595. mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
  596. mindspore/train/callback/_summary_collector.py +44 -30
  597. mindspore/train/callback/_time_monitor.py +62 -12
  598. mindspore/train/data_sink.py +10 -16
  599. mindspore/train/dataset_helper.py +154 -86
  600. mindspore/train/loss_scale_manager.py +14 -9
  601. mindspore/train/metrics/__init__.py +10 -2
  602. mindspore/train/metrics/accuracy.py +1 -1
  603. mindspore/train/metrics/auc.py +1 -1
  604. mindspore/train/metrics/bleu_score.py +2 -2
  605. mindspore/train/metrics/confusion_matrix.py +14 -14
  606. mindspore/train/metrics/cosine_similarity.py +3 -3
  607. mindspore/train/metrics/dice.py +1 -1
  608. mindspore/train/metrics/fbeta.py +1 -1
  609. mindspore/train/metrics/hausdorff_distance.py +8 -6
  610. mindspore/train/metrics/mean_surface_distance.py +5 -4
  611. mindspore/train/metrics/metric.py +49 -17
  612. mindspore/train/metrics/occlusion_sensitivity.py +4 -4
  613. mindspore/train/metrics/perplexity.py +1 -1
  614. mindspore/train/metrics/precision.py +2 -2
  615. mindspore/train/metrics/recall.py +2 -3
  616. mindspore/train/metrics/roc.py +7 -7
  617. mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
  618. mindspore/train/metrics/topk.py +7 -4
  619. mindspore/train/mind_ir_pb2.py +193 -48
  620. mindspore/train/model.py +377 -133
  621. mindspore/train/serialization.py +697 -245
  622. mindspore/train/summary/_summary_adapter.py +5 -2
  623. mindspore/train/summary/_writer_pool.py +4 -3
  624. mindspore/train/summary/summary_record.py +25 -23
  625. mindspore/train/train_thor/convert_utils.py +39 -23
  626. mindspore/train/train_thor/dataset_helper.py +4 -3
  627. mindspore/train/train_thor/model_thor.py +8 -8
  628. mindspore/version.py +1 -1
  629. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
  630. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
  631. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  632. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  633. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  634. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  635. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  636. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  637. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  638. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  639. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  640. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  641. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  642. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  643. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  644. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  645. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  646. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  647. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  648. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  649. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  650. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  651. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  652. mindspore/_extends/graph_kernel/expander.py +0 -80
  653. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
  654. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  655. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  656. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  657. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  658. mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
  659. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  660. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  661. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  662. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  663. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  664. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  665. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  666. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  667. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  668. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  669. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  670. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  671. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  672. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  673. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  674. mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
  675. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  676. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  677. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  678. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  679. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  680. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  681. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  682. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  683. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  684. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  685. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  686. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  687. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  688. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  689. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  690. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  691. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  692. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  693. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  694. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  695. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  696. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  697. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  698. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  699. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  700. mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
  701. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  702. mindspore/_extends/parse/jit_fallback_modules.py +0 -51
  703. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  704. mindspore/dataset/engine/graphdata.py +0 -1586
  705. mindspore/include/api/net.h +0 -142
  706. mindspore/ops/_grad/grad_array_ops.py +0 -1347
  707. mindspore/ops/_grad/grad_clip_ops.py +0 -84
  708. mindspore/ops/_grad/grad_debug_ops.py +0 -68
  709. mindspore/ops/_grad/grad_inner_ops.py +0 -235
  710. mindspore/ops/_grad/grad_math_ops.py +0 -1684
  711. mindspore/ops/_grad/grad_nn_ops.py +0 -1529
  712. mindspore/ops/_grad/grad_other_ops.py +0 -89
  713. mindspore/ops/_grad/grad_sequence_ops.py +0 -296
  714. mindspore/ops/_grad/grad_sparse.py +0 -323
  715. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
  716. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
  717. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  718. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  719. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  720. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
  721. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
  722. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
  723. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
  724. mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
  725. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
  726. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
  727. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  728. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
  729. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  730. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
  731. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  732. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
  733. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
  734. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
  735. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  736. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  737. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
  738. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
  739. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
  740. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
  741. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
  742. mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
  743. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
  744. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
  745. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
  746. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  747. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
  748. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  749. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  750. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
  751. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
  752. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
  753. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  754. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  755. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  756. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
  757. mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
  758. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  759. mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
  760. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
  761. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
  762. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
  763. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
  764. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
  765. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
  766. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  767. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
  768. mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
  769. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
  770. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
  771. mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
  772. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  773. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
  774. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
  775. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
  776. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
  777. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
  778. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
  779. mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
  780. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  781. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
  782. mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
  783. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
  784. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
  785. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
  786. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
  787. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
  788. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
  789. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
  790. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
  791. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
  792. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
  793. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  794. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  795. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  796. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
  797. mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
  798. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  799. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  800. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
  801. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
  802. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
  803. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
  804. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  805. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  806. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  807. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
  808. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
  809. mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
  810. mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
  811. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
  812. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  813. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
  814. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
  815. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
  816. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
  817. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
  818. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
  819. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
  820. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
  821. mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
  822. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  823. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  824. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
  825. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
  826. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
  827. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  828. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
  829. mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
  830. mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
  831. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
  832. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  833. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
  834. mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
  835. mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
  836. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
  837. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  838. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
  839. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
  840. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  841. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
  842. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
  843. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  844. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  845. mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
  846. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
  847. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
  848. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
  849. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
  850. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  851. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
  852. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
  853. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
  854. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
  855. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  856. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  857. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
  858. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
  859. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
  860. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
  861. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
  862. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
  863. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
  864. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
  865. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  866. mindspore/rewrite/node_visitor.py +0 -44
  867. mindspore/rewrite/topological_manager.py +0 -203
  868. mindspore/scipy/sparse/linalg.py +0 -192
  869. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  870. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -24,9 +24,13 @@ and use Lookup to find the index of tokens in Vocab.
24
24
  class attributes (self.xxx) to support save() and load().
25
25
 
26
26
  Examples:
27
- >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
27
+ >>> import mindspore.dataset as ds
28
+ >>> import mindspore.dataset.text as text
29
+ >>>
28
30
  >>> # Create a dataset for text sentences saved as line data in a file
29
- >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir, shuffle=False)
31
+ >>> text_file_list = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
32
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list, shuffle=False)
33
+ >>>
30
34
  >>> # Tokenize sentences to unicode characters
31
35
  >>> tokenizer = text.UnicodeCharTokenizer()
32
36
  >>> # Load vocabulary from list
@@ -99,7 +103,7 @@ class AddToken(TextTensorOperation):
99
103
  token (str): The token to be added.
100
104
  begin (bool, optional): Choose the position where the token is inserted. If True,
101
105
  the token will be inserted at the beginning of the sequence. Otherwise, it will
102
- be inserted at the end of the sequence. Default: True.
106
+ be inserted at the end of the sequence. Default: ``True``.
103
107
 
104
108
  Raises:
105
109
  TypeError: If `token` is not of type string.
@@ -109,6 +113,9 @@ class AddToken(TextTensorOperation):
109
113
  ``CPU``
110
114
 
111
115
  Examples:
116
+ >>> import mindspore.dataset as ds
117
+ >>> import mindspore.dataset.text as text
118
+ >>>
112
119
  >>> dataset = ds.NumpySlicesDataset(data={"text": [['a', 'b', 'c', 'd', 'e']]})
113
120
  >>> # Data before
114
121
  >>> # | text |
@@ -122,6 +129,10 @@ class AddToken(TextTensorOperation):
122
129
  >>> # +---------------------------+
123
130
  >>> # | ['TOKEN', 'a', 'b', 'c', 'd', 'e'] |
124
131
  >>> # +---------------------------+
132
+
133
+ Tutorial Examples:
134
+ - `Illustration of text transforms
135
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
125
136
  """
126
137
 
127
138
  @check_add_token
@@ -136,49 +147,53 @@ class AddToken(TextTensorOperation):
136
147
 
137
148
  class JiebaTokenizer(TextTensorOperation):
138
149
  """
139
- Tokenize Chinese string into words based on dictionary.
150
+ Use Jieba tokenizer to tokenize Chinese strings.
140
151
 
141
152
  Note:
142
- The integrity of the HMMSEgment algorithm and MPSegment algorithm files must be confirmed.
153
+ The dictionary files used by Hidden Markov Model segment and Max Probability segment can be
154
+ obtained through the `cppjieba GitHub <https://github.com/yanyiwu/cppjieba/tree/master/dict>`_ .
155
+ Please ensure the validity and integrity of these files.
143
156
 
144
157
  Args:
145
- hmm_path (str): Dictionary file is used by HMMSegment algorithm.
146
- The dictionary can be obtained on the official website of cppjieba.
147
- mp_path (str): Dictionary file is used by MPSegment algorithm.
148
- The dictionary can be obtained on the official website of cppjieba.
149
- mode (JiebaMode, optional): Valid values can be any of [JiebaMode.MP, JiebaMode.HMM,
150
- JiebaMode.MIX]. Default: JiebaMode.MIX.
151
-
152
- - JiebaMode.MP, tokenize with MPSegment algorithm.
153
-
154
- - JiebaMode.HMM, tokenize with Hidden Markov Model Segment algorithm.
155
-
156
- - JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
157
-
158
- with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
158
+ hmm_path (str): Path to the dictionary file used by Hidden Markov Model segment.
159
+ mp_path (str): Path to the dictionary file used by Max Probability segment.
160
+ mode (JiebaMode, optional): The desired segment algorithms. See :class:`~.text.JiebaMode`
161
+ for details on optional values. Default: ``JiebaMode.MIX`` .
162
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
163
+ token in the original string. Default: ``False`` .
159
164
 
160
165
  Raises:
161
- ValueError: If path of HMMSegment dict is not provided.
162
- ValueError: If path of MPSegment dict is not provided.
163
- TypeError: If `hmm_path` or `mp_path` is not of type string.
166
+ TypeError: If `hmm_path` is not of type str.
167
+ TypeError: If `mp_path` is not of type str.
168
+ TypeError: If `mode` is not of type :class:`~.text.JiebaMode` .
164
169
  TypeError: If `with_offsets` is not of type bool.
165
170
 
166
171
  Supported Platforms:
167
172
  ``CPU``
168
173
 
169
174
  Examples:
175
+ >>> import mindspore.dataset as ds
170
176
  >>> import mindspore.dataset.text as text
171
177
  >>> from mindspore.dataset.text import JiebaMode
172
- >>> # If with_offsets=False, default output one column {["text", dtype=str]}
178
+ >>>
179
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
180
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
181
+ >>>
182
+ >>> # 1) If with_offsets=False, return one data column {["text", dtype=str]}
173
183
  >>> jieba_hmm_file = "/path/to/jieba/hmm/file"
174
184
  >>> jieba_mp_file = "/path/to/jieba/mp/file"
175
185
  >>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=False)
176
186
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
177
- >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
178
- >>> # ["offsets_limit", dtype=uint32]}
187
+ >>>
188
+ >>> # 2) If with_offsets=True, return three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
189
+ >>> # ["offsets_limit", dtype=uint32]}
179
190
  >>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=True)
180
- >>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
181
- ... output_columns=["token", "offsets_start", "offsets_limit"])
191
+ >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
192
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
193
+
194
+ Tutorial Examples:
195
+ - `Illustration of text transforms
196
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
182
197
  """
183
198
 
184
199
  @check_jieba_init
@@ -213,17 +228,19 @@ class JiebaTokenizer(TextTensorOperation):
213
228
  @check_jieba_add_word
214
229
  def add_word(self, word, freq=None):
215
230
  """
216
- Add a user defined word to JiebaTokenizer's dictionary.
231
+ Add a specified word mapping to the Vocab of the tokenizer.
217
232
 
218
233
  Args:
219
- word (str): The word to be added to the JiebaTokenizer instance.
220
- The added word will not be written into the built-in dictionary on disk.
221
- freq (int, optional): The frequency of the word to be added. The higher the frequency,
222
- the better chance the word will be tokenized. Default: None, use default frequency.
234
+ word (str): The word to be added to the Vocab.
235
+ freq (int, optional): The frequency of the word to be added. The higher the word frequency,
236
+ the greater the chance that the word will be tokenized. Default: ``None``, using the
237
+ default word frequency.
223
238
 
224
239
  Examples:
240
+ >>> import mindspore.dataset as ds
225
241
  >>> import mindspore.dataset.text as text
226
242
  >>> from mindspore.dataset.text import JiebaMode
243
+ >>>
227
244
  >>> jieba_hmm_file = "/path/to/jieba/hmm/file"
228
245
  >>> jieba_mp_file = "/path/to/jieba/mp/file"
229
246
  >>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
@@ -232,6 +249,9 @@ class JiebaTokenizer(TextTensorOperation):
232
249
  ... for line in f:
233
250
  ... word = line.split(',')[0]
234
251
  ... jieba_op.add_word(word)
252
+ >>>
253
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
254
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
235
255
  >>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
236
256
  """
237
257
 
@@ -244,30 +264,30 @@ class JiebaTokenizer(TextTensorOperation):
244
264
  @check_jieba_add_dict
245
265
  def add_dict(self, user_dict):
246
266
  """
247
- Add a user defined word to JiebaTokenizer's dictionary.
267
+ Add the specified word mappings to the Vocab of the tokenizer.
248
268
 
249
269
  Args:
250
- user_dict (Union[str, dict]): One of the two loading methods is file path(str) loading
251
- (according to the Jieba dictionary format) and the other is Python dictionary(dict) loading,
252
- Python Dict format: {word1:freq1, word2:freq2,...}.
253
- Jieba dictionary format : word(required), freq(optional), such as:
254
-
255
- .. code-block::
256
-
257
- word1 freq1
258
- word2 None
259
- word3 freq3
260
-
261
- Only valid word-freq pairs in user provided file will be added into the dictionary.
262
- Rows containing invalid input will be ignored. No error nor warning Status is returned.
270
+ user_dict (Union[str, dict[str, int]]): The word mappings to be added to the Vocab.
271
+ If the input type is str, it means the path of the file storing the word mappings to be added.
272
+ Each line of the file should contain two fields separated by a space, where the first field
273
+ indicates the word itself and the second field should be a number indicating the word frequency.
274
+ Invalid lines will be ignored and no error or warning will be returned.
275
+ If the input type is dict[str, int], it means the dictionary storing the word mappings to be added,
276
+ where the key name is the word itself and the key value is the word frequency.
263
277
 
264
278
  Examples:
279
+ >>> import mindspore.dataset as ds
280
+ >>> import mindspore.dataset.text as text
265
281
  >>> from mindspore.dataset.text import JiebaMode
282
+ >>>
266
283
  >>> jieba_hmm_file = "/path/to/jieba/hmm/file"
267
284
  >>> jieba_mp_file = "/path/to/jieba/mp/file"
268
285
  >>> user_dict = {"男默女泪": 10}
269
286
  >>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
270
287
  >>> jieba_op.add_dict(user_dict)
288
+ >>>
289
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
290
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
271
291
  >>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
272
292
  """
273
293
 
@@ -303,7 +323,7 @@ class JiebaTokenizer(TextTensorOperation):
303
323
  raise ValueError(
304
324
  "user dict file {} is not exist.".format(file_path))
305
325
  real_file_path = os.path.realpath(file_path)
306
- file_dict = open(real_file_path)
326
+ file_dict = open(real_file_path, "r")
307
327
  data_re = re.compile('^\\s*([^\\s*]+?)\\s*([0-9]+)?\\s*$', re.U)
308
328
  words_list = []
309
329
  for item in file_dict:
@@ -327,9 +347,9 @@ class Lookup(TextTensorOperation):
327
347
  vocab (Vocab): A vocabulary object.
328
348
  unknown_token (str, optional): Word is used for lookup. In case of the word is out of vocabulary (OOV),
329
349
  the result of lookup will be replaced with unknown_token. If the unknown_token is not specified or
330
- it is OOV, runtime error will be thrown. Default: None, means no unknown_token is specified.
350
+ it is OOV, runtime error will be thrown. Default: ``None``, means no unknown_token is specified.
331
351
  data_type (mindspore.dtype, optional): The data type that lookup operation maps
332
- string to. Default: mindspore.int32.
352
+ string to. Default: ``mstype.int32``.
333
353
 
334
354
  Raises:
335
355
  TypeError: If `vocab` is not of type text.Vocab.
@@ -340,12 +360,20 @@ class Lookup(TextTensorOperation):
340
360
  ``CPU``
341
361
 
342
362
  Examples:
363
+ >>> import mindspore.dataset as ds
343
364
  >>> import mindspore.dataset.text as text
344
365
  >>> # Load vocabulary from list
345
366
  >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您'])
346
367
  >>> # Use Lookup operation to map tokens to ids
347
368
  >>> lookup = text.Lookup(vocab)
369
+ >>>
370
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
371
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
348
372
  >>> text_file_dataset = text_file_dataset.map(operations=[lookup])
373
+
374
+ Tutorial Examples:
375
+ - `Illustration of text transforms
376
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
349
377
  """
350
378
 
351
379
  @check_lookup
@@ -373,13 +401,13 @@ class Ngram(TextTensorOperation):
373
401
  an empty string produced.
374
402
  left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width).
375
403
  `pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the
376
- sequence with "__". Default: ('', 0).
404
+ sequence with "__". Default: ``('', 0)``.
377
405
  right_pad (tuple, optional): Padding performed on right side of the sequence shaped like
378
406
  ("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("_", 2)
379
- would pad right side of the sequence with "__". Default: ('', 0).
407
+ would pad right side of the sequence with "__". Default: ``('', 0)``.
380
408
  separator (str, optional): Symbol used to join strings together. For example, if 2-gram is
381
- ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"].
382
- Default: ' ', which will use whitespace as separator.
409
+ ["mindspore", "amazing"] with separator is ``"-"``, the result would be ["mindspore-amazing"].
410
+ Default: ``' '``, which will use whitespace as separator.
383
411
 
384
412
  Raises:
385
413
  TypeError: If values of `n` not positive is not of type int.
@@ -392,13 +420,21 @@ class Ngram(TextTensorOperation):
392
420
  ``CPU``
393
421
 
394
422
  Examples:
423
+ >>> import mindspore.dataset as ds
395
424
  >>> import mindspore.dataset.text as text
396
425
  >>> ngram_op = text.Ngram(3, separator="-")
397
426
  >>> output = ngram_op(["WildRose Country", "Canada's Ocean Playground", "Land of Living Skies"])
398
427
  >>> # output
399
428
  >>> # ["WildRose Country-Canada's Ocean Playground-Land of Living Skies"]
429
+ >>>
400
430
  >>> # same ngram_op called through map
431
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
432
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
401
433
  >>> text_file_dataset = text_file_dataset.map(operations=ngram_op)
434
+
435
+ Tutorial Examples:
436
+ - `Illustration of text transforms
437
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
402
438
  """
403
439
 
404
440
  @check_ngram
@@ -427,9 +463,19 @@ class PythonTokenizer:
427
463
  ``CPU``
428
464
 
429
465
  Examples:
466
+ >>> import mindspore.dataset as ds
467
+ >>> import mindspore.dataset.text as text
468
+ >>>
430
469
  >>> def my_tokenizer(line):
431
470
  ... return line.split()
471
+ >>>
472
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
473
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
432
474
  >>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
475
+
476
+ Tutorial Examples:
477
+ - `Illustration of text transforms
478
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
433
479
  """
434
480
 
435
481
  @check_python_tokenizer
@@ -464,11 +510,11 @@ class SentencePieceTokenizer(TextTensorOperation):
464
510
  mode (Union[str, SentencePieceVocab]): SentencePiece model.
465
511
  If the input parameter is a file, it represents the path of SentencePiece mode to be loaded.
466
512
  If the input parameter is a SentencePieceVocab object, it should be constructed in advanced.
467
- out_type (SPieceTokenizerOutType): The type of output, it can be any of [SPieceTokenizerOutType.STRING,
468
- SPieceTokenizerOutType.INT].
513
+ out_type (SPieceTokenizerOutType): The type of output, it can be ``SPieceTokenizerOutType.STRING``,
514
+ ``SPieceTokenizerOutType.INT``.
469
515
 
470
- - SPieceTokenizerOutType.STRING, means output type of SentencePice Tokenizer is string.
471
- - SPieceTokenizerOutType.INT, means output type of SentencePice Tokenizer is int.
516
+ - ``SPieceTokenizerOutType.STRING``, means output type of SentencePice Tokenizer is string.
517
+ - ``SPieceTokenizerOutType.INT``, means output type of SentencePice Tokenizer is int.
472
518
 
473
519
  Raises:
474
520
  TypeError: If `mode` is not of type string or SentencePieceVocab.
@@ -478,13 +524,22 @@ class SentencePieceTokenizer(TextTensorOperation):
478
524
  ``CPU``
479
525
 
480
526
  Examples:
527
+ >>> import mindspore.dataset as ds
481
528
  >>> import mindspore.dataset.text as text
482
529
  >>> from mindspore.dataset.text import SentencePieceModel, SPieceTokenizerOutType
530
+ >>>
483
531
  >>> sentence_piece_vocab_file = "/path/to/sentence/piece/vocab/file"
484
532
  >>> vocab = text.SentencePieceVocab.from_file([sentence_piece_vocab_file], 5000, 0.9995,
485
533
  ... SentencePieceModel.UNIGRAM, {})
486
534
  >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)
535
+ >>>
536
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
537
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
487
538
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer)
539
+
540
+ Tutorial Examples:
541
+ - `Illustration of text transforms
542
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
488
543
  """
489
544
 
490
545
  @check_sentence_piece_tokenizer
@@ -505,7 +560,7 @@ class SlidingWindow(TextTensorOperation):
505
560
 
506
561
  Args:
507
562
  width (int): The width of the window. It must be an integer and greater than zero.
508
- axis (int, optional): The axis along which the sliding window is computed. Default: 0.
563
+ axis (int, optional): The axis along which the sliding window is computed. Default: ``0``.
509
564
 
510
565
  Raises:
511
566
  TypeError: If `width` is not of type int.
@@ -517,6 +572,8 @@ class SlidingWindow(TextTensorOperation):
517
572
 
518
573
  Examples:
519
574
  >>> import mindspore.dataset as ds
575
+ >>> import mindspore.dataset.text as text
576
+ >>>
520
577
  >>> dataset = ds.NumpySlicesDataset(data=[[1, 2, 3, 4, 5]], column_names="col1")
521
578
  >>> # Data before
522
579
  >>> # | col1 |
@@ -531,6 +588,10 @@ class SlidingWindow(TextTensorOperation):
531
588
  >>> # | [2, 3, 4], |
532
589
  >>> # | [3, 4, 5]] |
533
590
  >>> # +--------------+
591
+
592
+ Tutorial Examples:
593
+ - `Illustration of text transforms
594
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
534
595
  """
535
596
 
536
597
  @check_slidingwindow
@@ -566,10 +627,15 @@ class ToNumber(TextTensorOperation):
566
627
  >>> import mindspore.dataset as ds
567
628
  >>> import mindspore.dataset.text as text
568
629
  >>> from mindspore import dtype as mstype
630
+ >>>
569
631
  >>> data = [["1", "2", "3"]]
570
632
  >>> dataset = ds.NumpySlicesDataset(data)
571
633
  >>> to_number_op = text.ToNumber(mstype.int8)
572
634
  >>> dataset = dataset.map(operations=to_number_op)
635
+
636
+ Tutorial Examples:
637
+ - `Illustration of text transforms
638
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
573
639
  """
574
640
 
575
641
  @check_to_number
@@ -589,10 +655,11 @@ class ToVectors(TextTensorOperation):
589
655
  Args:
590
656
  vectors (Vectors): A vectors object.
591
657
  unk_init (sequence, optional): Sequence used to initialize out-of-vectors (OOV) token.
592
- Default: None, initialize with zero vectors.
593
- lower_case_backup (bool, optional): Whether to look up the token in the lower case. If False, each token in the
594
- original case will be looked up; if True, each token in the original case will be looked up first, if not
595
- found in the keys of the property stoi, the token in the lower case will be looked up. Default: False.
658
+ Default: ``None``, initialize with zero vectors.
659
+ lower_case_backup (bool, optional): Whether to look up the token in the lower case. If ``False``,
660
+ each token in the original case will be looked up; if ``True``, each token in the original
661
+ case will be looked up first, if not found in the keys of the property stoi, the token in the
662
+ lower case will be looked up. Default: ``False``.
596
663
 
597
664
  Raises:
598
665
  TypeError: If `unk_init` is not of type sequence.
@@ -603,12 +670,21 @@ class ToVectors(TextTensorOperation):
603
670
  ``CPU``
604
671
 
605
672
  Examples:
673
+ >>> import mindspore.dataset as ds
606
674
  >>> import mindspore.dataset.text as text
675
+ >>>
607
676
  >>> # Load vectors from file
608
677
  >>> vectors = text.Vectors.from_file("/path/to/vectors/file")
609
678
  >>> # Use ToVectors operation to map tokens to vectors
610
679
  >>> to_vectors = text.ToVectors(vectors)
680
+ >>>
681
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
682
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
611
683
  >>> text_file_dataset = text_file_dataset.map(operations=[to_vectors])
684
+
685
+ Tutorial Examples:
686
+ - `Illustration of text transforms
687
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
612
688
  """
613
689
 
614
690
  @check_to_vectors
@@ -638,6 +714,9 @@ class Truncate(TextTensorOperation):
638
714
  ``CPU``
639
715
 
640
716
  Examples:
717
+ >>> import mindspore.dataset as ds
718
+ >>> import mindspore.dataset.text as text
719
+ >>>
641
720
  >>> dataset = ds.NumpySlicesDataset(data=[['a', 'b', 'c', 'd', 'e']], column_names=["text"], shuffle=False)
642
721
  >>> # Data before
643
722
  >>> # | col1 |
@@ -651,6 +730,10 @@ class Truncate(TextTensorOperation):
651
730
  >>> # +------------------------+
652
731
  >>> # | ['a', 'b', 'c', 'd'] |
653
732
  >>> # +------------------------+
733
+
734
+ Tutorial Examples:
735
+ - `Illustration of text transforms
736
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
654
737
  """
655
738
 
656
739
  @check_truncate
@@ -664,12 +747,12 @@ class Truncate(TextTensorOperation):
664
747
 
665
748
  class TruncateSequencePair(TextTensorOperation):
666
749
  """
667
- Truncate a pair of rank-1 tensors such that the total length is less than max_length.
668
-
669
- This operation takes two input tensors and returns two output Tensors.
750
+ Truncate a pair of 1-D string input so that their total length is less than the specified length.
670
751
 
671
752
  Args:
672
- max_length (int): Maximum length required.
753
+ max_length (int): The maximum total length of the output strings. If it is no less than the
754
+ total length of the original pair of strings, no truncation is performed; otherwise, the
755
+ longer of the two input strings is truncated until its total length equals this value.
673
756
 
674
757
  Raises:
675
758
  TypeError: If `max_length` is not of type int.
@@ -678,7 +761,9 @@ class TruncateSequencePair(TextTensorOperation):
678
761
  ``CPU``
679
762
 
680
763
  Examples:
764
+ >>> import mindspore.dataset as ds
681
765
  >>> import mindspore.dataset.text as text
766
+ >>>
682
767
  >>> dataset = ds.NumpySlicesDataset(data={"col1": [[1, 2, 3]], "col2": [[4, 5]]})
683
768
  >>> # Data before
684
769
  >>> # | col1 | col2 |
@@ -692,6 +777,10 @@ class TruncateSequencePair(TextTensorOperation):
692
777
  >>> # +-----------+-----------+
693
778
  >>> # | [1, 2] | [4, 5] |
694
779
  >>> # +-----------+-----------+
780
+
781
+ Tutorial Examples:
782
+ - `Illustration of text transforms
783
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
695
784
  """
696
785
 
697
786
  @check_pair_truncate
@@ -705,10 +794,11 @@ class TruncateSequencePair(TextTensorOperation):
705
794
 
706
795
  class UnicodeCharTokenizer(TextTensorOperation):
707
796
  """
708
- Tokenize a scalar tensor of UTF-8 string to Unicode characters.
797
+ Unpack the Unicode characters in the input strings.
709
798
 
710
799
  Args:
711
- with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
800
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
801
+ token in the original string. Default: ``False`` .
712
802
 
713
803
  Raises:
714
804
  TypeError: If `with_offsets` is not of type bool.
@@ -717,15 +807,25 @@ class UnicodeCharTokenizer(TextTensorOperation):
717
807
  ``CPU``
718
808
 
719
809
  Examples:
810
+ >>> import mindspore.dataset as ds
720
811
  >>> import mindspore.dataset.text as text
812
+ >>>
813
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
814
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
815
+ >>>
721
816
  >>> # If with_offsets=False, default output one column {["text", dtype=str]}
722
817
  >>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=False)
723
818
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
819
+ >>>
724
820
  >>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
725
821
  >>> # ["offsets_limit", dtype=uint32]}
726
822
  >>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=True)
727
823
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
728
824
  ... output_columns=["token", "offsets_start", "offsets_limit"])
825
+
826
+ Tutorial Examples:
827
+ - `Illustration of text transforms
828
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
729
829
  """
730
830
 
731
831
  @check_with_offsets
@@ -743,13 +843,14 @@ class WordpieceTokenizer(TextTensorOperation):
743
843
 
744
844
  Args:
745
845
  vocab (Vocab): Vocabulary used to look up words.
746
- suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: '##'.
846
+ suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
747
847
  max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
748
- not be split. Default: 100.
848
+ not be split. Default: ``100``.
749
849
  unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
750
850
  unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
751
- output. Default: '[UNK]'.
752
- with_offsets (bool, optional): Whether to return the offsets of tokens. Default: False.
851
+ output. Default: ``'[UNK]'``.
852
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
853
+ token in the original string. Default: ``False`` .
753
854
 
754
855
  Raises:
755
856
  TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
@@ -763,19 +864,31 @@ class WordpieceTokenizer(TextTensorOperation):
763
864
  ``CPU``
764
865
 
765
866
  Examples:
867
+ >>> import mindspore.dataset as ds
766
868
  >>> import mindspore.dataset.text as text
869
+ >>>
870
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
871
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
872
+ >>>
767
873
  >>> vocab_list = ["book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"]
768
874
  >>> vocab = text.Vocab.from_list(vocab_list)
875
+ >>>
769
876
  >>> # If with_offsets=False, default output one column {["text", dtype=str]}
770
877
  >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
771
878
  ... max_bytes_per_token=100, with_offsets=False)
772
879
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
880
+ >>>
773
881
  >>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
774
882
  >>> # ["offsets_limit", dtype=uint32]}
775
883
  >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
776
884
  ... max_bytes_per_token=100, with_offsets=True)
885
+ >>>
777
886
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
778
887
  ... output_columns=["token", "offsets_start", "offsets_limit"])
888
+
889
+ Tutorial Examples:
890
+ - `Illustration of text transforms
891
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
779
892
  """
780
893
 
781
894
  @check_wordpiece_tokenizer
@@ -813,27 +926,20 @@ if platform.system().lower() != 'windows':
813
926
  Args:
814
927
  lower_case (bool, optional): Whether to perform lowercase processing on the text. If True, will fold the
815
928
  text to lower case and strip accented characters. If False, will only perform normalization on the
816
- text, with mode specified by `normalization_form` . Default: False.
817
- keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: False.
818
- normalization_form (NormalizeForm, optional):
819
- `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ , only valid when `lower_case`
820
- is False, can be NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD or
821
- NormalizeForm.NFKD. Default: NormalizeForm.NONE.
822
-
823
- - NormalizeForm.NONE, no normalization.
824
- - NormalizeForm.NFC, Canonical Decomposition, followed by Canonical Composition.
825
- - NormalizeForm.NFKC, Compatibility Decomposition, followed by Canonical Composition.
826
- - NormalizeForm.NFD, Canonical Decomposition.
827
- - NormalizeForm.NFKD, Compatibility Decomposition.
828
-
929
+ text, with mode specified by `normalization_form` . Default: ``False``.
930
+ keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: ``False``.
931
+ normalization_form (NormalizeForm, optional): The desired normalization form.
932
+ See :class:`~.text.NormalizeForm` for details on optional values.
933
+ Default: ``NormalizeForm.NFKC`` .
829
934
  preserve_unused_token (bool, optional): Whether to preserve special tokens. If True, will not split special
830
- tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: True.
831
- with_offsets (bool, optional): Whether to return the offsets of tokens. Default: False.
935
+ tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: ``True``.
936
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
937
+ token in the original string. Default: ``False`` .
832
938
 
833
939
  Raises:
834
940
  TypeError: If `lower_case` is not of type bool.
835
941
  TypeError: If `keep_whitespace` is not of type bool.
836
- TypeError: If `normalization_form` is not of type :class:`mindspore.dataset.text.NormalizeForm` .
942
+ TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
837
943
  TypeError: If `preserve_unused_token` is not of type bool.
838
944
  TypeError: If `with_offsets` is not of type bool.
839
945
  RuntimeError: If dtype of input Tensor is not str.
@@ -842,27 +948,34 @@ if platform.system().lower() != 'windows':
842
948
  ``CPU``
843
949
 
844
950
  Examples:
951
+ >>> import mindspore.dataset as ds
845
952
  >>> import mindspore.dataset.text as text
846
953
  >>> from mindspore.dataset.text import NormalizeForm
847
954
  >>>
848
- >>> # If with_offsets=False, default output one column {["text", dtype=str]}
955
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
956
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
957
+ >>>
958
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
849
959
  >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
850
960
  ... keep_whitespace=False,
851
961
  ... normalization_form=NormalizeForm.NONE,
852
962
  ... preserve_unused_token=True,
853
963
  ... with_offsets=False)
854
964
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
855
- >>> # If with_offsets=True, then output three columns {["token", dtype=str],
856
- >>> # ["offsets_start", dtype=uint32],
857
- >>> # ["offsets_limit", dtype=uint32]}
965
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
966
+ >>> # ["offsets_start", dtype=uint32],
967
+ >>> # ["offsets_limit", dtype=uint32]}
858
968
  >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
859
969
  ... keep_whitespace=False,
860
970
  ... normalization_form=NormalizeForm.NONE,
861
971
  ... preserve_unused_token=True,
862
972
  ... with_offsets=True)
863
- >>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
864
- ... output_columns=["token", "offsets_start",
865
- ... "offsets_limit"])
973
+ >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
974
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
975
+
976
+ Tutorial Examples:
977
+ - `Illustration of text transforms
978
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
866
979
  """
867
980
 
868
981
  @check_basic_tokenizer
@@ -892,30 +1005,25 @@ if platform.system().lower() != 'windows':
892
1005
 
893
1006
  Args:
894
1007
  vocab (Vocab): Vocabulary used to look up words.
895
- suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: '##'.
1008
+ suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
896
1009
  max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
897
- not be split. Default: 100.
1010
+ not be split. Default: ``100``.
898
1011
  unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
899
1012
  unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
900
- output. Default: '[UNK]'.
901
- lower_case (bool, optional): Whether to perform lowercase processing on the text. If True, will fold the
902
- text to lower case and strip accented characters. If False, will only perform normalization on the
903
- text, with mode specified by `normalization_form` . Default: False.
904
- keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: False.
905
- normalization_form (NormalizeForm, optional):
906
- `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ , only valid when `lower_case`
907
- is False, can be NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD or
908
- NormalizeForm.NFKD. Default: NormalizeForm.NONE.
909
-
910
- - NormalizeForm.NONE, no normalization.
911
- - NormalizeForm.NFC, Canonical Decomposition, followed by Canonical Composition.
912
- - NormalizeForm.NFKC, Compatibility Decomposition, followed by Canonical Composition.
913
- - NormalizeForm.NFD, Canonical Decomposition.
914
- - NormalizeForm.NFKD, Compatibility Decomposition.
915
-
916
- preserve_unused_token (bool, optional): Whether to preserve special tokens. If True, will not split special
917
- tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: True.
918
- with_offsets (bool, optional): Whether to return the offsets of tokens. Default: False.
1013
+ output. Default: ``'[UNK]'``.
1014
+ lower_case (bool, optional): Whether to perform lowercase processing on the text. If ``True``, will fold the
1015
+ text to lower case and strip accented characters. If ``False``, will only perform normalization on the
1016
+ text, with mode specified by `normalization_form` . Default: ``False``.
1017
+ keep_whitespace (bool, optional): If ``True``, the whitespace will be kept in the output.
1018
+ Default: ``False``.
1019
+ normalization_form (NormalizeForm, optional): The desired normalization form.
1020
+ See :class:`~.text.NormalizeForm` for details on optional values.
1021
+ Default: ``NormalizeForm.NFKC`` .
1022
+ preserve_unused_token (bool, optional): Whether to preserve special tokens. If ``True``,
1023
+ will not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'.
1024
+ Default: ``True``.
1025
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1026
+ token in the original string. Default: ``False`` .
919
1027
 
920
1028
  Raises:
921
1029
  TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
@@ -925,7 +1033,7 @@ if platform.system().lower() != 'windows':
925
1033
  TypeError: If `unknown_token` is not of type str.
926
1034
  TypeError: If `lower_case` is not of type bool.
927
1035
  TypeError: If `keep_whitespace` is not of type bool.
928
- TypeError: If `normalization_form` is not of type :class:`mindspore.dataset.text.NormalizeForm` .
1036
+ TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
929
1037
  TypeError: If `preserve_unused_token` is not of type bool.
930
1038
  TypeError: If `with_offsets` is not of type bool.
931
1039
 
@@ -933,10 +1041,14 @@ if platform.system().lower() != 'windows':
933
1041
  ``CPU``
934
1042
 
935
1043
  Examples:
1044
+ >>> import mindspore.dataset as ds
936
1045
  >>> import mindspore.dataset.text as text
937
1046
  >>> from mindspore.dataset.text import NormalizeForm
938
1047
  >>>
939
- >>> # If with_offsets=False, default output one column {["text", dtype=str]}
1048
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1049
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1050
+ >>>
1051
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
940
1052
  >>> vocab_list = ["床", "前", "明", "月", "光", "疑", "是", "地", "上", "霜", "举", "头", "望", "低",
941
1053
  ... "思", "故", "乡","繁", "體", "字", "嘿", "哈", "大", "笑", "嘻", "i", "am", "mak",
942
1054
  ... "make", "small", "mistake", "##s", "during", "work", "##ing", "hour", "😀", "😃",
@@ -948,16 +1060,20 @@ if platform.system().lower() != 'windows':
948
1060
  ... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
949
1061
  ... with_offsets=False)
950
1062
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
951
- >>> # If with_offsets=True, then output three columns {["token", dtype=str],
952
- >>> # ["offsets_start", dtype=uint32],
953
- >>> # ["offsets_limit", dtype=uint32]}
1063
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1064
+ >>> # ["offsets_start", dtype=uint32],
1065
+ >>> # ["offsets_limit", dtype=uint32]}
954
1066
  >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
955
1067
  ... unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
956
1068
  ... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
957
1069
  ... with_offsets=True)
958
- >>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
1070
+ >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
959
1071
  ... output_columns=["token", "offsets_start",
960
1072
  ... "offsets_limit"])
1073
+
1074
+ Tutorial Examples:
1075
+ - `Illustration of text transforms
1076
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
961
1077
  """
962
1078
 
963
1079
  @check_bert_tokenizer
@@ -997,9 +1113,16 @@ if platform.system().lower() != 'windows':
997
1113
  ``CPU``
998
1114
 
999
1115
  Examples:
1116
+ >>> import mindspore.dataset as ds
1000
1117
  >>> import mindspore.dataset.text as text
1001
1118
  >>> case_op = text.CaseFold()
1119
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1120
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1002
1121
  >>> text_file_dataset = text_file_dataset.map(operations=case_op)
1122
+
1123
+ Tutorial Examples:
1124
+ - `Illustration of text transforms
1125
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1003
1126
  """
1004
1127
 
1005
1128
  def parse(self):
@@ -1018,10 +1141,17 @@ if platform.system().lower() != 'windows':
1018
1141
  ``CPU``
1019
1142
 
1020
1143
  Examples:
1144
+ >>> import mindspore.dataset as ds
1021
1145
  >>> import mindspore.dataset.text as text
1022
1146
  >>>
1023
1147
  >>> replace_op = text.FilterWikipediaXML()
1148
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1149
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1024
1150
  >>> text_file_dataset = text_file_dataset.map(operations=replace_op)
1151
+
1152
+ Tutorial Examples:
1153
+ - `Illustration of text transforms
1154
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1025
1155
  """
1026
1156
 
1027
1157
  def parse(self):
@@ -1030,34 +1160,35 @@ if platform.system().lower() != 'windows':
1030
1160
 
1031
1161
  class NormalizeUTF8(TextTensorOperation):
1032
1162
  """
1033
- Apply normalize operation on UTF-8 string tensor.
1163
+ Normalize the input UTF-8 encoded strings.
1034
1164
 
1035
1165
  Note:
1036
1166
  NormalizeUTF8 is not supported on Windows platform yet.
1037
1167
 
1038
1168
  Args:
1039
- normalize_form (NormalizeForm, optional): Valid values can be [NormalizeForm.NONE, NormalizeForm.NFC,
1040
- NormalizeForm.NFKC, NormalizeForm.NFD, NormalizeForm.NFKD] any of the four unicode
1041
- normalized forms. Default: NormalizeForm.NFKC.
1042
- See http://unicode.org/reports/tr15/ for details.
1043
-
1044
- - NormalizeForm.NONE, do nothing for input string tensor.
1045
- - NormalizeForm.NFC, normalize with Normalization Form C.
1046
- - NormalizeForm.NFKC, normalize with Normalization Form KC.
1047
- - NormalizeForm.NFD, normalize with Normalization Form D.
1048
- - NormalizeForm.NFKD, normalize with Normalization Form KD.
1169
+ normalize_form (NormalizeForm, optional): The desired normalization form.
1170
+ See :class:`~.text.NormalizeForm` for details on optional values.
1171
+ Default: ``NormalizeForm.NFKC`` .
1049
1172
 
1050
1173
  Raises:
1051
- TypeError: If `normalize_form` is not of type NormalizeForm.
1174
+ TypeError: If `normalize_form` is not of type :class:`~.text.NormalizeForm`.
1052
1175
 
1053
1176
  Supported Platforms:
1054
1177
  ``CPU``
1055
1178
 
1056
1179
  Examples:
1180
+ >>> import mindspore.dataset as ds
1057
1181
  >>> import mindspore.dataset.text as text
1058
1182
  >>> from mindspore.dataset.text import NormalizeForm
1183
+ >>>
1059
1184
  >>> normalize_op = text.NormalizeUTF8(normalize_form=NormalizeForm.NFC)
1185
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1186
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1060
1187
  >>> text_file_dataset = text_file_dataset.map(operations=normalize_op)
1188
+
1189
+ Tutorial Examples:
1190
+ - `Illustration of text transforms
1191
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1061
1192
  """
1062
1193
 
1063
1194
  def __init__(self, normalize_form=NormalizeForm.NFKC):
@@ -1074,33 +1205,39 @@ if platform.system().lower() != 'windows':
1074
1205
 
1075
1206
  class RegexReplace(TextTensorOperation):
1076
1207
  """
1077
- Replace a part of UTF-8 string tensor with given text according to regular expressions.
1078
-
1079
- See https://unicode-org.github.io/icu/userguide/strings/regexp.html for supported regex pattern.
1208
+ Replace part of the input UTF-8 string with a difference text string using regular expressions.
1080
1209
 
1081
1210
  Note:
1082
1211
  RegexReplace is not supported on Windows platform yet.
1083
1212
 
1084
1213
  Args:
1085
- pattern (str): the regex expression patterns.
1086
- replace (str): the string to replace matched element.
1087
- replace_all (bool, optional): If False, only replace first matched element;
1088
- if True, replace all matched elements. Default: True.
1214
+ pattern (str): The regular expression, used to mean the specific, standard textual syntax for
1215
+ representing patterns for matching text.
1216
+ replace (str): The string used to replace the matched elements.
1217
+ replace_all (bool, optional): Whether to replace all matched elements. If ``False``, only the
1218
+ first matched element will be replaced; otherwise, all matched elements will be replaced.
1219
+ Default: ``True``.
1089
1220
 
1090
1221
  Raises:
1091
- TypeError: If `pattern` is not of type string.
1092
- TypeError: If `replace` is not of type string.
1222
+ TypeError: If `pattern` is not of type str.
1223
+ TypeError: If `replace` is not of type str.
1093
1224
  TypeError: If `replace_all` is not of type bool.
1094
1225
 
1095
1226
  Supported Platforms:
1096
1227
  ``CPU``
1097
1228
 
1098
1229
  Examples:
1230
+ >>> import mindspore.dataset as ds
1099
1231
  >>> import mindspore.dataset.text as text
1100
- >>> pattern = 'Canada'
1101
- >>> replace = 'China'
1102
- >>> replace_op = text.RegexReplace(pattern, replace)
1103
- >>> text_file_dataset = text_file_dataset.map(operations=replace_op)
1232
+ >>>
1233
+ >>> regex_replace = text.RegexReplace('apple', 'orange')
1234
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1235
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1236
+ >>> text_file_dataset = text_file_dataset.map(operations=regex_replace)
1237
+
1238
+ Tutorial Examples:
1239
+ - `Illustration of text transforms
1240
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1104
1241
  """
1105
1242
 
1106
1243
  @check_regex_replace
@@ -1128,8 +1265,9 @@ if platform.system().lower() != 'windows':
1128
1265
  The original string will be split by matched elements.
1129
1266
  keep_delim_pattern (str, optional): The string matched by 'delim_pattern' can be kept as a token
1130
1267
  if it can be matched by 'keep_delim_pattern'. The default value is an empty str
1131
- which means that delimiters will not be kept as an output token. Default: ''.
1132
- with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
1268
+ which means that delimiters will not be kept as an output token. Default: ``''``.
1269
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1270
+ token in the original string. Default: ``False`` .
1133
1271
 
1134
1272
  Raises:
1135
1273
  TypeError: If `delim_pattern` is not of type string.
@@ -1140,18 +1278,27 @@ if platform.system().lower() != 'windows':
1140
1278
  ``CPU``
1141
1279
 
1142
1280
  Examples:
1281
+ >>> import mindspore.dataset as ds
1143
1282
  >>> import mindspore.dataset.text as text
1144
- >>> # If with_offsets=False, default output is one column {["text", dtype=str]}
1283
+ >>>
1284
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1285
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1286
+ >>>
1287
+ >>> # 1) If with_offsets=False, default output is one column {["text", dtype=str]}
1145
1288
  >>> delim_pattern = r"[ |,]"
1146
1289
  >>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=False)
1147
1290
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
1148
- >>> # If with_offsets=True, then output three columns {["token", dtype=str],
1149
- >>> # ["offsets_start", dtype=uint32],
1150
- >>> # ["offsets_limit", dtype=uint32]}
1291
+ >>>
1292
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1293
+ >>> # ["offsets_start", dtype=uint32],
1294
+ >>> # ["offsets_limit", dtype=uint32]}
1151
1295
  >>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=True)
1152
- >>> text_file_dataset_1 = text_file_dataset_1.map(operations=tokenizer_op, input_columns=["text"],
1153
- ... output_columns=["token", "offsets_start",
1154
- ... "offsets_limit"])
1296
+ >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
1297
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1298
+
1299
+ Tutorial Examples:
1300
+ - `Illustration of text transforms
1301
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1155
1302
  """
1156
1303
 
1157
1304
  @check_regex_tokenizer
@@ -1173,8 +1320,9 @@ if platform.system().lower() != 'windows':
1173
1320
  UnicodeScriptTokenizer is not supported on Windows platform yet.
1174
1321
 
1175
1322
  Args:
1176
- keep_whitespace (bool, optional): Whether or not emit whitespace tokens. Default: False.
1177
- with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
1323
+ keep_whitespace (bool, optional): Whether or not emit whitespace tokens. Default: ``False``.
1324
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1325
+ token in the original string. Default: ``False`` .
1178
1326
 
1179
1327
  Raises:
1180
1328
  TypeError: If `keep_whitespace` is not of type bool.
@@ -1184,17 +1332,27 @@ if platform.system().lower() != 'windows':
1184
1332
  ``CPU``
1185
1333
 
1186
1334
  Examples:
1335
+ >>> import mindspore.dataset as ds
1187
1336
  >>> import mindspore.dataset.text as text
1188
- >>> # If with_offsets=False, default output one column {["text", dtype=str]}
1337
+ >>>
1338
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1339
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1340
+ >>>
1341
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1189
1342
  >>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=False)
1190
1343
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
1191
- >>> # If with_offsets=True, then output three columns {["token", dtype=str],
1192
- >>> # ["offsets_start", dtype=uint32],
1193
- >>> # ["offsets_limit", dtype=uint32]}
1344
+ >>>
1345
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1346
+ >>> # ["offsets_start", dtype=uint32],
1347
+ >>> # ["offsets_limit", dtype=uint32]}
1194
1348
  >>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
1195
1349
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
1196
1350
  ... output_columns=["token", "offsets_start", "offsets_limit"])
1197
1351
 
1352
+ Tutorial Examples:
1353
+ - `Illustration of text transforms
1354
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1355
+
1198
1356
  """
1199
1357
 
1200
1358
  @check_unicode_script_tokenizer
@@ -1217,7 +1375,8 @@ if platform.system().lower() != 'windows':
1217
1375
  WhitespaceTokenizer is not supported on Windows platform yet.
1218
1376
 
1219
1377
  Args:
1220
- with_offsets (bool, optional): Whether or not output offsets of tokens. Default: False.
1378
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1379
+ token in the original string. Default: ``False`` .
1221
1380
 
1222
1381
  Raises:
1223
1382
  TypeError: If `with_offsets` is not of type bool.
@@ -1226,16 +1385,26 @@ if platform.system().lower() != 'windows':
1226
1385
  ``CPU``
1227
1386
 
1228
1387
  Examples:
1388
+ >>> import mindspore.dataset as ds
1229
1389
  >>> import mindspore.dataset.text as text
1230
- >>> # If with_offsets=False, default output one column {["text", dtype=str]}
1390
+ >>>
1391
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
1392
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
1393
+ >>>
1394
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1231
1395
  >>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=False)
1232
1396
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op)
1233
- >>> # If with_offsets=True, then output three columns {["token", dtype=str],
1397
+ >>>
1398
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1234
1399
  >>> # ["offsets_start", dtype=uint32],
1235
1400
  >>> # ["offsets_limit", dtype=uint32]}
1236
1401
  >>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=True)
1237
1402
  >>> text_file_dataset = text_file_dataset.map(operations=tokenizer_op, input_columns=["text"],
1238
1403
  ... output_columns=["token", "offsets_start", "offsets_limit"])
1404
+
1405
+ Tutorial Examples:
1406
+ - `Illustration of text transforms
1407
+ <https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/text_gallery.html>`_
1239
1408
  """
1240
1409
 
1241
1410
  @check_with_offsets