mindspore 2.4.0__cp310-cp310-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (1387) hide show
  1. mindspore/.commit_id +1 -0
  2. mindspore/__init__.py +53 -0
  3. mindspore/_c_dataengine.cpython-310-darwin.so +0 -0
  4. mindspore/_c_expression.cpython-310-darwin.so +0 -0
  5. mindspore/_c_mindrecord.cpython-310-darwin.so +0 -0
  6. mindspore/_check_jit_forbidden_api.py +106 -0
  7. mindspore/_checkparam.py +1419 -0
  8. mindspore/_extends/__init__.py +23 -0
  9. mindspore/_extends/builtin_operations.py +224 -0
  10. mindspore/_extends/graph_kernel/__init__.py +17 -0
  11. mindspore/_extends/graph_kernel/model/__init__.py +19 -0
  12. mindspore/_extends/graph_kernel/model/graph_parallel.py +311 -0
  13. mindspore/_extends/graph_kernel/model/graph_split.py +1348 -0
  14. mindspore/_extends/graph_kernel/model/model.py +553 -0
  15. mindspore/_extends/graph_kernel/model/model_builder.py +216 -0
  16. mindspore/_extends/graph_kernel/parallel_estimate.py +60 -0
  17. mindspore/_extends/graph_kernel/splitter.py +140 -0
  18. mindspore/_extends/graph_kernel/utils.py +28 -0
  19. mindspore/_extends/parallel_compile/__init__.py +19 -0
  20. mindspore/_extends/parallel_compile/akg_compiler/__init__.py +19 -0
  21. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +269 -0
  22. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +529 -0
  23. mindspore/_extends/parallel_compile/akg_compiler/compiler.py +56 -0
  24. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
  25. mindspore/_extends/parallel_compile/akg_compiler/get_file_path.py +36 -0
  26. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +556 -0
  27. mindspore/_extends/parallel_compile/akg_compiler/util.py +159 -0
  28. mindspore/_extends/parse/__init__.py +49 -0
  29. mindspore/_extends/parse/compile_config.py +299 -0
  30. mindspore/_extends/parse/namespace.py +136 -0
  31. mindspore/_extends/parse/parser.py +1448 -0
  32. mindspore/_extends/parse/resources.py +213 -0
  33. mindspore/_extends/parse/standard_method.py +4475 -0
  34. mindspore/_extends/parse/trope.py +97 -0
  35. mindspore/_extends/pijit/__init__.py +23 -0
  36. mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
  37. mindspore/_extends/remote/__init__.py +19 -0
  38. mindspore/_extends/remote/kernel_build_server.py +199 -0
  39. mindspore/_extends/remote/kernel_build_server_akg.py +55 -0
  40. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  41. mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
  42. mindspore/_extends/utils.py +68 -0
  43. mindspore/_install_custom.py +43 -0
  44. mindspore/_profiler.py +30 -0
  45. mindspore/amp.py +433 -0
  46. mindspore/boost/__init__.py +42 -0
  47. mindspore/boost/adasum.py +319 -0
  48. mindspore/boost/base.py +535 -0
  49. mindspore/boost/boost.py +400 -0
  50. mindspore/boost/boost_cell_wrapper.py +790 -0
  51. mindspore/boost/dim_reduce.py +323 -0
  52. mindspore/boost/grad_accumulation.py +79 -0
  53. mindspore/boost/grad_freeze.py +382 -0
  54. mindspore/boost/group_loss_scale_manager.py +166 -0
  55. mindspore/boost/less_batch_normalization.py +174 -0
  56. mindspore/common/__init__.py +86 -0
  57. mindspore/common/_auto_dynamic.py +68 -0
  58. mindspore/common/_decorator.py +50 -0
  59. mindspore/common/_jit_fallback_utils.py +110 -0
  60. mindspore/common/_monad.py +25 -0
  61. mindspore/common/_pijit_context.py +190 -0
  62. mindspore/common/_register_for_adapter.py +74 -0
  63. mindspore/common/_register_for_recompute.py +48 -0
  64. mindspore/common/_register_for_tensor.py +46 -0
  65. mindspore/common/_stub_tensor.py +210 -0
  66. mindspore/common/_tensor_overload.py +139 -0
  67. mindspore/common/_utils.py +122 -0
  68. mindspore/common/api.py +2064 -0
  69. mindspore/common/auto_dynamic_shape.py +507 -0
  70. mindspore/common/dtype.py +422 -0
  71. mindspore/common/dump.py +130 -0
  72. mindspore/common/file_system.py +48 -0
  73. mindspore/common/generator.py +254 -0
  74. mindspore/common/hook_handle.py +143 -0
  75. mindspore/common/initializer.py +880 -0
  76. mindspore/common/jit_config.py +98 -0
  77. mindspore/common/lazy_inline.py +240 -0
  78. mindspore/common/mindir_util.py +111 -0
  79. mindspore/common/mutable.py +234 -0
  80. mindspore/common/no_inline.py +54 -0
  81. mindspore/common/np_dtype.py +25 -0
  82. mindspore/common/parameter.py +1081 -0
  83. mindspore/common/recompute.py +292 -0
  84. mindspore/common/seed.py +260 -0
  85. mindspore/common/sparse_tensor.py +1175 -0
  86. mindspore/common/symbol.py +122 -0
  87. mindspore/common/tensor.py +5039 -0
  88. mindspore/communication/__init__.py +37 -0
  89. mindspore/communication/_comm_helper.py +501 -0
  90. mindspore/communication/_hccl_management.py +297 -0
  91. mindspore/communication/comm_func.py +1395 -0
  92. mindspore/communication/management.py +673 -0
  93. mindspore/config/op_info.config +533 -0
  94. mindspore/context.py +2077 -0
  95. mindspore/dataset/__init__.py +90 -0
  96. mindspore/dataset/audio/__init__.py +61 -0
  97. mindspore/dataset/audio/transforms.py +3690 -0
  98. mindspore/dataset/audio/utils.py +386 -0
  99. mindspore/dataset/audio/validators.py +1172 -0
  100. mindspore/dataset/callback/__init__.py +20 -0
  101. mindspore/dataset/callback/ds_callback.py +368 -0
  102. mindspore/dataset/callback/validators.py +32 -0
  103. mindspore/dataset/core/__init__.py +13 -0
  104. mindspore/dataset/core/config.py +1095 -0
  105. mindspore/dataset/core/datatypes.py +101 -0
  106. mindspore/dataset/core/py_util_helpers.py +65 -0
  107. mindspore/dataset/core/validator_helpers.py +781 -0
  108. mindspore/dataset/debug/__init__.py +21 -0
  109. mindspore/dataset/debug/debug_hook.py +97 -0
  110. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  111. mindspore/dataset/engine/__init__.py +124 -0
  112. mindspore/dataset/engine/cache_admin.py +47 -0
  113. mindspore/dataset/engine/cache_client.py +129 -0
  114. mindspore/dataset/engine/datasets.py +4582 -0
  115. mindspore/dataset/engine/datasets_audio.py +911 -0
  116. mindspore/dataset/engine/datasets_standard_format.py +543 -0
  117. mindspore/dataset/engine/datasets_text.py +2161 -0
  118. mindspore/dataset/engine/datasets_user_defined.py +1184 -0
  119. mindspore/dataset/engine/datasets_vision.py +4816 -0
  120. mindspore/dataset/engine/iterators.py +371 -0
  121. mindspore/dataset/engine/obs/__init__.py +23 -0
  122. mindspore/dataset/engine/obs/config_loader.py +68 -0
  123. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +508 -0
  124. mindspore/dataset/engine/obs/util.py +482 -0
  125. mindspore/dataset/engine/offload.py +596 -0
  126. mindspore/dataset/engine/queue.py +304 -0
  127. mindspore/dataset/engine/samplers.py +895 -0
  128. mindspore/dataset/engine/serializer_deserializer.py +159 -0
  129. mindspore/dataset/engine/validators.py +2895 -0
  130. mindspore/dataset/text/__init__.py +51 -0
  131. mindspore/dataset/text/transforms.py +1703 -0
  132. mindspore/dataset/text/utils.py +715 -0
  133. mindspore/dataset/text/validators.py +642 -0
  134. mindspore/dataset/transforms/__init__.py +45 -0
  135. mindspore/dataset/transforms/c_transforms.py +638 -0
  136. mindspore/dataset/transforms/py_transforms.py +393 -0
  137. mindspore/dataset/transforms/py_transforms_util.py +255 -0
  138. mindspore/dataset/transforms/transforms.py +1260 -0
  139. mindspore/dataset/transforms/validators.py +410 -0
  140. mindspore/dataset/utils/__init__.py +19 -0
  141. mindspore/dataset/utils/browse_dataset.py +190 -0
  142. mindspore/dataset/utils/line_reader.py +126 -0
  143. mindspore/dataset/vision/__init__.py +65 -0
  144. mindspore/dataset/vision/c_transforms.py +2641 -0
  145. mindspore/dataset/vision/py_transforms.py +2120 -0
  146. mindspore/dataset/vision/py_transforms_util.py +1660 -0
  147. mindspore/dataset/vision/transforms.py +7295 -0
  148. mindspore/dataset/vision/utils.py +863 -0
  149. mindspore/dataset/vision/validators.py +1483 -0
  150. mindspore/default_config.py +2 -0
  151. mindspore/experimental/__init__.py +20 -0
  152. mindspore/experimental/es/__init__.py +22 -0
  153. mindspore/experimental/es/embedding_service.py +883 -0
  154. mindspore/experimental/es/embedding_service_layer.py +581 -0
  155. mindspore/experimental/llm_boost/__init__.py +21 -0
  156. mindspore/experimental/llm_boost/atb/__init__.py +23 -0
  157. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  158. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  159. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  160. mindspore/experimental/llm_boost/register.py +129 -0
  161. mindspore/experimental/llm_boost/utils.py +31 -0
  162. mindspore/experimental/map_parameter.py +309 -0
  163. mindspore/experimental/optim/__init__.py +40 -0
  164. mindspore/experimental/optim/adadelta.py +161 -0
  165. mindspore/experimental/optim/adagrad.py +168 -0
  166. mindspore/experimental/optim/adam.py +193 -0
  167. mindspore/experimental/optim/adamax.py +170 -0
  168. mindspore/experimental/optim/adamw.py +290 -0
  169. mindspore/experimental/optim/asgd.py +153 -0
  170. mindspore/experimental/optim/lr_scheduler.py +1371 -0
  171. mindspore/experimental/optim/nadam.py +157 -0
  172. mindspore/experimental/optim/optimizer.py +262 -0
  173. mindspore/experimental/optim/radam.py +194 -0
  174. mindspore/experimental/optim/rmsprop.py +154 -0
  175. mindspore/experimental/optim/rprop.py +164 -0
  176. mindspore/experimental/optim/sgd.py +156 -0
  177. mindspore/hal/__init__.py +40 -0
  178. mindspore/hal/_ascend.py +57 -0
  179. mindspore/hal/_base.py +57 -0
  180. mindspore/hal/_cpu.py +56 -0
  181. mindspore/hal/_gpu.py +57 -0
  182. mindspore/hal/contiguous_tensors_handle.py +175 -0
  183. mindspore/hal/device.py +356 -0
  184. mindspore/hal/event.py +179 -0
  185. mindspore/hal/memory.py +326 -0
  186. mindspore/hal/stream.py +357 -0
  187. mindspore/include/OWNERS +7 -0
  188. mindspore/include/api/allocator.h +97 -0
  189. mindspore/include/api/callback/callback.h +93 -0
  190. mindspore/include/api/callback/ckpt_saver.h +41 -0
  191. mindspore/include/api/callback/loss_monitor.h +33 -0
  192. mindspore/include/api/callback/lr_scheduler.h +51 -0
  193. mindspore/include/api/callback/time_monitor.h +34 -0
  194. mindspore/include/api/callback/train_accuracy.h +37 -0
  195. mindspore/include/api/cell.h +90 -0
  196. mindspore/include/api/cfg.h +82 -0
  197. mindspore/include/api/context.h +602 -0
  198. mindspore/include/api/data_type.h +47 -0
  199. mindspore/include/api/delegate.h +178 -0
  200. mindspore/include/api/delegate_api.h +75 -0
  201. mindspore/include/api/dual_abi_helper.h +208 -0
  202. mindspore/include/api/format.h +28 -0
  203. mindspore/include/api/graph.h +46 -0
  204. mindspore/include/api/kernel.h +58 -0
  205. mindspore/include/api/kernel_api.h +168 -0
  206. mindspore/include/api/metrics/accuracy.h +36 -0
  207. mindspore/include/api/metrics/metrics.h +41 -0
  208. mindspore/include/api/model.h +438 -0
  209. mindspore/include/api/model_group.h +91 -0
  210. mindspore/include/api/model_parallel_runner.h +168 -0
  211. mindspore/include/api/serialization.h +185 -0
  212. mindspore/include/api/status.h +192 -0
  213. mindspore/include/api/types.h +431 -0
  214. mindspore/include/api/visible.h +41 -0
  215. mindspore/include/c_api/context_c.h +179 -0
  216. mindspore/include/c_api/data_type_c.h +52 -0
  217. mindspore/include/c_api/format_c.h +46 -0
  218. mindspore/include/c_api/model_c.h +347 -0
  219. mindspore/include/c_api/status_c.h +79 -0
  220. mindspore/include/c_api/tensor_c.h +146 -0
  221. mindspore/include/c_api/types_c.h +67 -0
  222. mindspore/include/dataset/config.h +163 -0
  223. mindspore/include/dataset/constants.h +363 -0
  224. mindspore/include/dataset/execute.h +196 -0
  225. mindspore/include/dataset/text.h +1092 -0
  226. mindspore/include/dataset/transforms.h +638 -0
  227. mindspore/include/dataset/vision.h +2129 -0
  228. mindspore/include/dataset/vision_ascend.h +206 -0
  229. mindspore/include/dataset/vision_lite.h +625 -0
  230. mindspore/lib/libavcodec.59.dylib +0 -0
  231. mindspore/lib/libavdevice.59.dylib +0 -0
  232. mindspore/lib/libavfilter.8.dylib +0 -0
  233. mindspore/lib/libavformat.59.dylib +0 -0
  234. mindspore/lib/libavutil.57.dylib +0 -0
  235. mindspore/lib/libdnnl.2.dylib +0 -0
  236. mindspore/lib/libicudata.69.dylib +0 -0
  237. mindspore/lib/libicui18n.69.dylib +0 -0
  238. mindspore/lib/libicuuc.69.dylib +0 -0
  239. mindspore/lib/libmindspore_address_sorting.15.dylib +0 -0
  240. mindspore/lib/libmindspore_backend.dylib +0 -0
  241. mindspore/lib/libmindspore_common.dylib +0 -0
  242. mindspore/lib/libmindspore_core.dylib +0 -0
  243. mindspore/lib/libmindspore_glog.0.dylib +0 -0
  244. mindspore/lib/libmindspore_gpr.15.dylib +0 -0
  245. mindspore/lib/libmindspore_grpc++.1.dylib +0 -0
  246. mindspore/lib/libmindspore_grpc.15.dylib +0 -0
  247. mindspore/lib/libmindspore_np_dtype.dylib +0 -0
  248. mindspore/lib/libmindspore_ops.dylib +0 -0
  249. mindspore/lib/libmindspore_upb.15.dylib +0 -0
  250. mindspore/lib/libnnacl.dylib +0 -0
  251. mindspore/lib/libopencv_core.4.5.dylib +0 -0
  252. mindspore/lib/libopencv_imgcodecs.4.5.dylib +0 -0
  253. mindspore/lib/libopencv_imgproc.4.5.dylib +0 -0
  254. mindspore/lib/libps_cache.dylib +0 -0
  255. mindspore/lib/libswresample.4.dylib +0 -0
  256. mindspore/lib/libswscale.6.dylib +0 -0
  257. mindspore/lib/libtinyxml2.8.dylib +0 -0
  258. mindspore/log.py +633 -0
  259. mindspore/mindrecord/__init__.py +43 -0
  260. mindspore/mindrecord/common/__init__.py +17 -0
  261. mindspore/mindrecord/common/constant.py +20 -0
  262. mindspore/mindrecord/common/enums.py +44 -0
  263. mindspore/mindrecord/common/exceptions.py +311 -0
  264. mindspore/mindrecord/config.py +809 -0
  265. mindspore/mindrecord/filereader.py +174 -0
  266. mindspore/mindrecord/filewriter.py +722 -0
  267. mindspore/mindrecord/mindpage.py +210 -0
  268. mindspore/mindrecord/shardheader.py +141 -0
  269. mindspore/mindrecord/shardindexgenerator.py +74 -0
  270. mindspore/mindrecord/shardreader.py +117 -0
  271. mindspore/mindrecord/shardsegment.py +128 -0
  272. mindspore/mindrecord/shardutils.py +185 -0
  273. mindspore/mindrecord/shardwriter.py +237 -0
  274. mindspore/mindrecord/tools/__init__.py +17 -0
  275. mindspore/mindrecord/tools/cifar10.py +140 -0
  276. mindspore/mindrecord/tools/cifar100.py +153 -0
  277. mindspore/mindrecord/tools/cifar100_to_mr.py +185 -0
  278. mindspore/mindrecord/tools/cifar10_to_mr.py +177 -0
  279. mindspore/mindrecord/tools/csv_to_mr.py +200 -0
  280. mindspore/mindrecord/tools/imagenet_to_mr.py +206 -0
  281. mindspore/mindrecord/tools/mnist_to_mr.py +259 -0
  282. mindspore/mindrecord/tools/tfrecord_to_mr.py +360 -0
  283. mindspore/mint/__init__.py +1586 -0
  284. mindspore/mint/distributed/__init__.py +31 -0
  285. mindspore/mint/distributed/distributed.py +254 -0
  286. mindspore/mint/linalg/__init__.py +22 -0
  287. mindspore/mint/nn/__init__.py +757 -0
  288. mindspore/mint/nn/functional.py +679 -0
  289. mindspore/mint/nn/layer/__init__.py +39 -0
  290. mindspore/mint/nn/layer/activation.py +133 -0
  291. mindspore/mint/nn/layer/normalization.py +477 -0
  292. mindspore/mint/nn/layer/pooling.py +110 -0
  293. mindspore/mint/optim/__init__.py +24 -0
  294. mindspore/mint/optim/adamw.py +206 -0
  295. mindspore/mint/special/__init__.py +63 -0
  296. mindspore/multiprocessing/__init__.py +73 -0
  297. mindspore/nn/__init__.py +47 -0
  298. mindspore/nn/cell.py +2787 -0
  299. mindspore/nn/dynamic_lr.py +482 -0
  300. mindspore/nn/grad/__init__.py +21 -0
  301. mindspore/nn/grad/cell_grad.py +196 -0
  302. mindspore/nn/layer/__init__.py +63 -0
  303. mindspore/nn/layer/activation.py +1822 -0
  304. mindspore/nn/layer/basic.py +1629 -0
  305. mindspore/nn/layer/channel_shuffle.py +90 -0
  306. mindspore/nn/layer/combined.py +248 -0
  307. mindspore/nn/layer/container.py +734 -0
  308. mindspore/nn/layer/conv.py +1505 -0
  309. mindspore/nn/layer/dense.py +204 -0
  310. mindspore/nn/layer/embedding.py +869 -0
  311. mindspore/nn/layer/image.py +661 -0
  312. mindspore/nn/layer/math.py +1069 -0
  313. mindspore/nn/layer/normalization.py +1273 -0
  314. mindspore/nn/layer/padding.py +880 -0
  315. mindspore/nn/layer/pooling.py +2302 -0
  316. mindspore/nn/layer/rnn_cells.py +388 -0
  317. mindspore/nn/layer/rnns.py +849 -0
  318. mindspore/nn/layer/thor_layer.py +963 -0
  319. mindspore/nn/layer/timedistributed.py +155 -0
  320. mindspore/nn/layer/transformer.py +823 -0
  321. mindspore/nn/learning_rate_schedule.py +512 -0
  322. mindspore/nn/loss/__init__.py +36 -0
  323. mindspore/nn/loss/loss.py +2924 -0
  324. mindspore/nn/metrics.py +53 -0
  325. mindspore/nn/optim/__init__.py +45 -0
  326. mindspore/nn/optim/_dist_optimizer_registry.py +111 -0
  327. mindspore/nn/optim/ada_grad.py +217 -0
  328. mindspore/nn/optim/adadelta.py +206 -0
  329. mindspore/nn/optim/adafactor.py +448 -0
  330. mindspore/nn/optim/adam.py +1297 -0
  331. mindspore/nn/optim/adamax.py +220 -0
  332. mindspore/nn/optim/adasum.py +548 -0
  333. mindspore/nn/optim/asgd.py +216 -0
  334. mindspore/nn/optim/ftrl.py +401 -0
  335. mindspore/nn/optim/lamb.py +296 -0
  336. mindspore/nn/optim/lars.py +202 -0
  337. mindspore/nn/optim/lazyadam.py +533 -0
  338. mindspore/nn/optim/momentum.py +239 -0
  339. mindspore/nn/optim/optimizer.py +1034 -0
  340. mindspore/nn/optim/proximal_ada_grad.py +242 -0
  341. mindspore/nn/optim/rmsprop.py +264 -0
  342. mindspore/nn/optim/rprop.py +251 -0
  343. mindspore/nn/optim/sgd.py +237 -0
  344. mindspore/nn/optim/tft_wrapper.py +127 -0
  345. mindspore/nn/optim/thor.py +1310 -0
  346. mindspore/nn/probability/__init__.py +22 -0
  347. mindspore/nn/probability/bijector/__init__.py +35 -0
  348. mindspore/nn/probability/bijector/bijector.py +337 -0
  349. mindspore/nn/probability/bijector/exp.py +65 -0
  350. mindspore/nn/probability/bijector/gumbel_cdf.py +144 -0
  351. mindspore/nn/probability/bijector/invert.py +126 -0
  352. mindspore/nn/probability/bijector/power_transform.py +196 -0
  353. mindspore/nn/probability/bijector/scalar_affine.py +167 -0
  354. mindspore/nn/probability/bijector/softplus.py +189 -0
  355. mindspore/nn/probability/bnn_layers/__init__.py +29 -0
  356. mindspore/nn/probability/bnn_layers/_util.py +46 -0
  357. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +112 -0
  358. mindspore/nn/probability/bnn_layers/conv_variational.py +267 -0
  359. mindspore/nn/probability/bnn_layers/dense_variational.py +302 -0
  360. mindspore/nn/probability/bnn_layers/layer_distribution.py +123 -0
  361. mindspore/nn/probability/distribution/__init__.py +56 -0
  362. mindspore/nn/probability/distribution/_utils/__init__.py +34 -0
  363. mindspore/nn/probability/distribution/_utils/custom_ops.py +96 -0
  364. mindspore/nn/probability/distribution/_utils/utils.py +362 -0
  365. mindspore/nn/probability/distribution/bernoulli.py +334 -0
  366. mindspore/nn/probability/distribution/beta.py +391 -0
  367. mindspore/nn/probability/distribution/categorical.py +435 -0
  368. mindspore/nn/probability/distribution/cauchy.py +383 -0
  369. mindspore/nn/probability/distribution/distribution.py +827 -0
  370. mindspore/nn/probability/distribution/exponential.py +350 -0
  371. mindspore/nn/probability/distribution/gamma.py +391 -0
  372. mindspore/nn/probability/distribution/geometric.py +335 -0
  373. mindspore/nn/probability/distribution/gumbel.py +257 -0
  374. mindspore/nn/probability/distribution/half_normal.py +133 -0
  375. mindspore/nn/probability/distribution/laplace.py +128 -0
  376. mindspore/nn/probability/distribution/log_normal.py +272 -0
  377. mindspore/nn/probability/distribution/logistic.py +379 -0
  378. mindspore/nn/probability/distribution/normal.py +336 -0
  379. mindspore/nn/probability/distribution/poisson.py +288 -0
  380. mindspore/nn/probability/distribution/student_t.py +149 -0
  381. mindspore/nn/probability/distribution/transformed_distribution.py +235 -0
  382. mindspore/nn/probability/distribution/uniform.py +375 -0
  383. mindspore/nn/reinforcement/__init__.py +24 -0
  384. mindspore/nn/reinforcement/_batch_read_write.py +142 -0
  385. mindspore/nn/reinforcement/_tensors_queue.py +152 -0
  386. mindspore/nn/reinforcement/tensor_array.py +145 -0
  387. mindspore/nn/sparse/__init__.py +23 -0
  388. mindspore/nn/sparse/sparse.py +147 -0
  389. mindspore/nn/wrap/__init__.py +49 -0
  390. mindspore/nn/wrap/cell_wrapper.py +968 -0
  391. mindspore/nn/wrap/grad_reducer.py +608 -0
  392. mindspore/nn/wrap/loss_scale.py +694 -0
  393. mindspore/numpy/__init__.py +121 -0
  394. mindspore/numpy/array_creations.py +2731 -0
  395. mindspore/numpy/array_ops.py +2629 -0
  396. mindspore/numpy/dtypes.py +185 -0
  397. mindspore/numpy/fft.py +966 -0
  398. mindspore/numpy/logic_ops.py +936 -0
  399. mindspore/numpy/math_ops.py +5911 -0
  400. mindspore/numpy/utils.py +214 -0
  401. mindspore/numpy/utils_const.py +565 -0
  402. mindspore/ops/__init__.py +56 -0
  403. mindspore/ops/_constants.py +30 -0
  404. mindspore/ops/_grad_experimental/__init__.py +31 -0
  405. mindspore/ops/_grad_experimental/grad_array_ops.py +830 -0
  406. mindspore/ops/_grad_experimental/grad_base.py +143 -0
  407. mindspore/ops/_grad_experimental/grad_comm_ops.py +714 -0
  408. mindspore/ops/_grad_experimental/grad_debug_ops.py +31 -0
  409. mindspore/ops/_grad_experimental/grad_implementations.py +203 -0
  410. mindspore/ops/_grad_experimental/grad_inner_ops.py +79 -0
  411. mindspore/ops/_grad_experimental/grad_math_ops.py +802 -0
  412. mindspore/ops/_grad_experimental/grad_nn_ops.py +231 -0
  413. mindspore/ops/_grad_experimental/grad_quant_ops.py +238 -0
  414. mindspore/ops/_grad_experimental/grad_sparse.py +342 -0
  415. mindspore/ops/_grad_experimental/grad_sparse_ops.py +399 -0
  416. mindspore/ops/_grad_experimental/taylor_rule.py +220 -0
  417. mindspore/ops/_op_impl/__init__.py +23 -0
  418. mindspore/ops/_op_impl/_custom_op/__init__.py +39 -0
  419. mindspore/ops/_op_impl/_custom_op/_basic.py +158 -0
  420. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +279 -0
  421. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +156 -0
  422. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +109 -0
  423. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +125 -0
  424. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +105 -0
  425. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +124 -0
  426. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +116 -0
  427. mindspore/ops/_op_impl/_custom_op/correction_mul.py +89 -0
  428. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +196 -0
  429. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +366 -0
  430. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +162 -0
  431. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +136 -0
  432. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +206 -0
  433. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +88 -0
  434. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +128 -0
  435. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +199 -0
  436. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +88 -0
  437. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +156 -0
  438. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +184 -0
  439. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +143 -0
  440. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +169 -0
  441. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +548 -0
  442. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +881 -0
  443. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +278 -0
  444. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +200 -0
  445. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +334 -0
  446. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +255 -0
  447. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +222 -0
  448. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +644 -0
  449. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +488 -0
  450. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +87 -0
  451. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +129 -0
  452. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +121 -0
  453. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +352 -0
  454. mindspore/ops/_op_impl/aicpu/__init__.py +441 -0
  455. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  456. mindspore/ops/_op_impl/aicpu/acos.py +32 -0
  457. mindspore/ops/_op_impl/aicpu/acos_grad.py +33 -0
  458. mindspore/ops/_op_impl/aicpu/acosh.py +34 -0
  459. mindspore/ops/_op_impl/aicpu/acosh_grad.py +35 -0
  460. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  461. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  462. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  463. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  464. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  465. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  466. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  467. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  468. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  469. mindspore/ops/_op_impl/aicpu/add_n.py +41 -0
  470. mindspore/ops/_op_impl/aicpu/add_v2.py +40 -0
  471. mindspore/ops/_op_impl/aicpu/addcdiv.py +41 -0
  472. mindspore/ops/_op_impl/aicpu/addcmul.py +47 -0
  473. mindspore/ops/_op_impl/aicpu/adjust_contrastv2.py +32 -0
  474. mindspore/ops/_op_impl/aicpu/adjust_hue.py +31 -0
  475. mindspore/ops/_op_impl/aicpu/adjust_saturation.py +32 -0
  476. mindspore/ops/_op_impl/aicpu/affine_grid.py +33 -0
  477. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  478. mindspore/ops/_op_impl/aicpu/angle.py +31 -0
  479. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  480. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  481. mindspore/ops/_op_impl/aicpu/argmax_with_value.py +43 -0
  482. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  483. mindspore/ops/_op_impl/aicpu/asin.py +32 -0
  484. mindspore/ops/_op_impl/aicpu/asin_grad.py +33 -0
  485. mindspore/ops/_op_impl/aicpu/asinh.py +34 -0
  486. mindspore/ops/_op_impl/aicpu/asinh_grad.py +35 -0
  487. mindspore/ops/_op_impl/aicpu/atanh.py +34 -0
  488. mindspore/ops/_op_impl/aicpu/avgpool_grad_v1.py +37 -0
  489. mindspore/ops/_op_impl/aicpu/avgpool_v1.py +36 -0
  490. mindspore/ops/_op_impl/aicpu/bartlett_window.py +36 -0
  491. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  492. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  493. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  494. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  495. mindspore/ops/_op_impl/aicpu/betainc.py +31 -0
  496. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  497. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +42 -0
  498. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  499. mindspore/ops/_op_impl/aicpu/blackman_window.py +36 -0
  500. mindspore/ops/_op_impl/aicpu/broadcast_to.py +58 -0
  501. mindspore/ops/_op_impl/aicpu/bucketize.py +34 -0
  502. mindspore/ops/_op_impl/aicpu/cache_swap_table.py +102 -0
  503. mindspore/ops/_op_impl/aicpu/cast.py +225 -0
  504. mindspore/ops/_op_impl/aicpu/cauchy.py +33 -0
  505. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  506. mindspore/ops/_op_impl/aicpu/check_numerics.py +33 -0
  507. mindspore/ops/_op_impl/aicpu/cholesky.py +32 -0
  508. mindspore/ops/_op_impl/aicpu/cholesky_inverse.py +31 -0
  509. mindspore/ops/_op_impl/aicpu/cholesky_solve.py +33 -0
  510. mindspore/ops/_op_impl/aicpu/choleskygrad.py +32 -0
  511. mindspore/ops/_op_impl/aicpu/coalesce.py +37 -0
  512. mindspore/ops/_op_impl/aicpu/col2im.py +38 -0
  513. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  514. mindspore/ops/_op_impl/aicpu/compare_and_bitpack.py +37 -0
  515. mindspore/ops/_op_impl/aicpu/complex.py +32 -0
  516. mindspore/ops/_op_impl/aicpu/complex_abs.py +31 -0
  517. mindspore/ops/_op_impl/aicpu/compute_accidental_hits.py +44 -0
  518. mindspore/ops/_op_impl/aicpu/concat.py +57 -0
  519. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  520. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  521. mindspore/ops/_op_impl/aicpu/conj.py +42 -0
  522. mindspore/ops/_op_impl/aicpu/conjugate_transpose.py +58 -0
  523. mindspore/ops/_op_impl/aicpu/cos.py +34 -0
  524. mindspore/ops/_op_impl/aicpu/cosh.py +34 -0
  525. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  526. mindspore/ops/_op_impl/aicpu/crop_and_resize.py +69 -0
  527. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_boxes.py +68 -0
  528. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  529. mindspore/ops/_op_impl/aicpu/cross.py +42 -0
  530. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_dense.py +48 -0
  531. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_sparse_tensor.py +51 -0
  532. mindspore/ops/_op_impl/aicpu/ctc_greedy_decoder.py +35 -0
  533. mindspore/ops/_op_impl/aicpu/ctc_loss_v2.py +43 -0
  534. mindspore/ops/_op_impl/aicpu/ctc_loss_v2_grad.py +45 -0
  535. mindspore/ops/_op_impl/aicpu/ctcloss.py +38 -0
  536. mindspore/ops/_op_impl/aicpu/cummax.py +41 -0
  537. mindspore/ops/_op_impl/aicpu/cumprod.py +58 -0
  538. mindspore/ops/_op_impl/aicpu/cumsum.py +58 -0
  539. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  540. mindspore/ops/_op_impl/aicpu/data_format_vec_permute.py +32 -0
  541. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  542. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  543. mindspore/ops/_op_impl/aicpu/dense_to_csr_sparse_matrix.py +49 -0
  544. mindspore/ops/_op_impl/aicpu/dense_to_dense_set_operation.py +45 -0
  545. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  546. mindspore/ops/_op_impl/aicpu/depth_to_space.py +44 -0
  547. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  548. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  549. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  550. mindspore/ops/_op_impl/aicpu/digamma.py +31 -0
  551. mindspore/ops/_op_impl/aicpu/div.py +41 -0
  552. mindspore/ops/_op_impl/aicpu/div_no_nan.py +35 -0
  553. mindspore/ops/_op_impl/aicpu/dropout2d.py +42 -0
  554. mindspore/ops/_op_impl/aicpu/dropout3d.py +42 -0
  555. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +41 -0
  556. mindspore/ops/_op_impl/aicpu/dropout_genmask_v3.py +32 -0
  557. mindspore/ops/_op_impl/aicpu/dynamic_stitch.py +42 -0
  558. mindspore/ops/_op_impl/aicpu/edit_distance.py +56 -0
  559. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  560. mindspore/ops/_op_impl/aicpu/embedding_lookup.py +102 -0
  561. mindspore/ops/_op_impl/aicpu/end_of_sequence.py +30 -0
  562. mindspore/ops/_op_impl/aicpu/environ_create.py +28 -0
  563. mindspore/ops/_op_impl/aicpu/environ_destroy_all.py +28 -0
  564. mindspore/ops/_op_impl/aicpu/environ_get.py +41 -0
  565. mindspore/ops/_op_impl/aicpu/environ_set.py +40 -0
  566. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  567. mindspore/ops/_op_impl/aicpu/equal.py +41 -0
  568. mindspore/ops/_op_impl/aicpu/exp.py +37 -0
  569. mindspore/ops/_op_impl/aicpu/expand.py +45 -0
  570. mindspore/ops/_op_impl/aicpu/expand_dims.py +42 -0
  571. mindspore/ops/_op_impl/aicpu/expm1.py +34 -0
  572. mindspore/ops/_op_impl/aicpu/extract_glimpse.py +35 -0
  573. mindspore/ops/_op_impl/aicpu/eye.py +44 -0
  574. mindspore/ops/_op_impl/aicpu/fft_with_size.py +47 -0
  575. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +39 -0
  576. mindspore/ops/_op_impl/aicpu/fill_v2.py +58 -0
  577. mindspore/ops/_op_impl/aicpu/flatten.py +43 -0
  578. mindspore/ops/_op_impl/aicpu/floor_div.py +38 -0
  579. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  580. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  581. mindspore/ops/_op_impl/aicpu/fractional_avg_pool.py +41 -0
  582. mindspore/ops/_op_impl/aicpu/fractional_avg_pool_grad.py +41 -0
  583. mindspore/ops/_op_impl/aicpu/fractional_max_pool.py +41 -0
  584. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_grad_with_fixed_ksize.py +43 -0
  585. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +65 -0
  586. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad.py +42 -0
  587. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad_with_fixed_ksize.py +42 -0
  588. mindspore/ops/_op_impl/aicpu/fractional_max_pool_with_fixed_ksize.py +49 -0
  589. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  590. mindspore/ops/_op_impl/aicpu/fused_sparse_adam.py +46 -0
  591. mindspore/ops/_op_impl/aicpu/fused_sparse_ftrl.py +41 -0
  592. mindspore/ops/_op_impl/aicpu/fused_sparse_lazy_adam.py +46 -0
  593. mindspore/ops/_op_impl/aicpu/fused_sparse_proximal_adagrad.py +39 -0
  594. mindspore/ops/_op_impl/aicpu/gamma.py +38 -0
  595. mindspore/ops/_op_impl/aicpu/gather.py +46 -0
  596. mindspore/ops/_op_impl/aicpu/gather_d.py +79 -0
  597. mindspore/ops/_op_impl/aicpu/gather_d_grad_v2.py +79 -0
  598. mindspore/ops/_op_impl/aicpu/gather_grad.py +54 -0
  599. mindspore/ops/_op_impl/aicpu/gather_nd.py +56 -0
  600. mindspore/ops/_op_impl/aicpu/gcd.py +32 -0
  601. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  602. mindspore/ops/_op_impl/aicpu/geqrf.py +32 -0
  603. mindspore/ops/_op_impl/aicpu/get_next.py +39 -0
  604. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  605. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  606. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  607. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  608. mindspore/ops/_op_impl/aicpu/grid_sampler_2d.py +35 -0
  609. mindspore/ops/_op_impl/aicpu/grid_sampler_2d_grad.py +38 -0
  610. mindspore/ops/_op_impl/aicpu/grid_sampler_3d.py +34 -0
  611. mindspore/ops/_op_impl/aicpu/grid_sampler_3d_grad.py +38 -0
  612. mindspore/ops/_op_impl/aicpu/hamming_window.py +57 -0
  613. mindspore/ops/_op_impl/aicpu/hard_sigmoid.py +32 -0
  614. mindspore/ops/_op_impl/aicpu/hard_sigmoid_grad.py +33 -0
  615. mindspore/ops/_op_impl/aicpu/heaviside.py +40 -0
  616. mindspore/ops/_op_impl/aicpu/histogram.py +35 -0
  617. mindspore/ops/_op_impl/aicpu/hsv_to_rgb.py +32 -0
  618. mindspore/ops/_op_impl/aicpu/hypot.py +32 -0
  619. mindspore/ops/_op_impl/aicpu/identity.py +42 -0
  620. mindspore/ops/_op_impl/aicpu/identity_n.py +41 -0
  621. mindspore/ops/_op_impl/aicpu/igamma.py +30 -0
  622. mindspore/ops/_op_impl/aicpu/igammac.py +30 -0
  623. mindspore/ops/_op_impl/aicpu/igammagrada.py +30 -0
  624. mindspore/ops/_op_impl/aicpu/im2col.py +43 -0
  625. mindspore/ops/_op_impl/aicpu/imag.py +31 -0
  626. mindspore/ops/_op_impl/aicpu/index_fill.py +54 -0
  627. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  628. mindspore/ops/_op_impl/aicpu/init_data_set_queue.py +27 -0
  629. mindspore/ops/_op_impl/aicpu/inplace_index_add.py +39 -0
  630. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  631. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  632. mindspore/ops/_op_impl/aicpu/is_finite.py +40 -0
  633. mindspore/ops/_op_impl/aicpu/is_inf.py +31 -0
  634. mindspore/ops/_op_impl/aicpu/is_nan.py +31 -0
  635. mindspore/ops/_op_impl/aicpu/kldivloss.py +34 -0
  636. mindspore/ops/_op_impl/aicpu/kldivlossgrad.py +35 -0
  637. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  638. mindspore/ops/_op_impl/aicpu/lcm.py +32 -0
  639. mindspore/ops/_op_impl/aicpu/left_shift.py +38 -0
  640. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  641. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  642. mindspore/ops/_op_impl/aicpu/lgamma.py +33 -0
  643. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +57 -0
  644. mindspore/ops/_op_impl/aicpu/linspace.py +33 -0
  645. mindspore/ops/_op_impl/aicpu/list_diff.py +50 -0
  646. mindspore/ops/_op_impl/aicpu/log.py +37 -0
  647. mindspore/ops/_op_impl/aicpu/log1p.py +34 -0
  648. mindspore/ops/_op_impl/aicpu/log_matrix_determinant.py +31 -0
  649. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  650. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +37 -0
  651. mindspore/ops/_op_impl/aicpu/logical_xor.py +30 -0
  652. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  653. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  654. mindspore/ops/_op_impl/aicpu/logspace.py +36 -0
  655. mindspore/ops/_op_impl/aicpu/lower_bound.py +47 -0
  656. mindspore/ops/_op_impl/aicpu/lstsq.py +34 -0
  657. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  658. mindspore/ops/_op_impl/aicpu/lu_solve.py +32 -0
  659. mindspore/ops/_op_impl/aicpu/lu_unpack.py +114 -0
  660. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +49 -0
  661. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  662. mindspore/ops/_op_impl/aicpu/masked_scatter.py +40 -0
  663. mindspore/ops/_op_impl/aicpu/masked_select.py +31 -0
  664. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +35 -0
  665. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  666. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  667. mindspore/ops/_op_impl/aicpu/matrix_determinant.py +30 -0
  668. mindspore/ops/_op_impl/aicpu/matrix_diag_part_v3.py +54 -0
  669. mindspore/ops/_op_impl/aicpu/matrix_diag_v3.py +56 -0
  670. mindspore/ops/_op_impl/aicpu/matrix_exp.py +34 -0
  671. mindspore/ops/_op_impl/aicpu/matrix_inverse.py +31 -0
  672. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  673. mindspore/ops/_op_impl/aicpu/matrix_power.py +37 -0
  674. mindspore/ops/_op_impl/aicpu/matrix_set_diag_v3.py +54 -0
  675. mindspore/ops/_op_impl/aicpu/matrix_solve.py +35 -0
  676. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  677. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  678. mindspore/ops/_op_impl/aicpu/max_pool3d_grad_with_argmax.py +60 -0
  679. mindspore/ops/_op_impl/aicpu/max_pool3d_with_argmax.py +59 -0
  680. mindspore/ops/_op_impl/aicpu/max_unpool2d.py +57 -0
  681. mindspore/ops/_op_impl/aicpu/max_unpool2d_grad.py +58 -0
  682. mindspore/ops/_op_impl/aicpu/max_unpool3d.py +57 -0
  683. mindspore/ops/_op_impl/aicpu/max_unpool3d_grad.py +58 -0
  684. mindspore/ops/_op_impl/aicpu/maximum_grad_grad.py +40 -0
  685. mindspore/ops/_op_impl/aicpu/maxpool_grad_v1.py +46 -0
  686. mindspore/ops/_op_impl/aicpu/maxpool_v1.py +42 -0
  687. mindspore/ops/_op_impl/aicpu/median.py +39 -0
  688. mindspore/ops/_op_impl/aicpu/median_grad.py +45 -0
  689. mindspore/ops/_op_impl/aicpu/meshgrid.py +41 -0
  690. mindspore/ops/_op_impl/aicpu/minimum_grad_grad.py +40 -0
  691. mindspore/ops/_op_impl/aicpu/mirror_pad.py +50 -0
  692. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +48 -0
  693. mindspore/ops/_op_impl/aicpu/mul.py +43 -0
  694. mindspore/ops/_op_impl/aicpu/mul_no_nan.py +42 -0
  695. mindspore/ops/_op_impl/aicpu/multi_margin_loss.py +37 -0
  696. mindspore/ops/_op_impl/aicpu/multi_margin_loss_grad.py +41 -0
  697. mindspore/ops/_op_impl/aicpu/multilabel_margin_loss_grad.py +37 -0
  698. mindspore/ops/_op_impl/aicpu/multinomial.py +47 -0
  699. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  700. mindspore/ops/_op_impl/aicpu/mvlgamma.py +32 -0
  701. mindspore/ops/_op_impl/aicpu/mvlgamma_grad.py +33 -0
  702. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  703. mindspore/ops/_op_impl/aicpu/neg.py +36 -0
  704. mindspore/ops/_op_impl/aicpu/nextafter.py +32 -0
  705. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  706. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  707. mindspore/ops/_op_impl/aicpu/no_repeat_ngram.py +34 -0
  708. mindspore/ops/_op_impl/aicpu/non_deterministic_ints.py +33 -0
  709. mindspore/ops/_op_impl/aicpu/non_max_suppression.py +36 -0
  710. mindspore/ops/_op_impl/aicpu/non_max_suppression_with_overlaps.py +35 -0
  711. mindspore/ops/_op_impl/aicpu/non_zero.py +43 -0
  712. mindspore/ops/_op_impl/aicpu/not_equal.py +39 -0
  713. mindspore/ops/_op_impl/aicpu/nth_element.py +39 -0
  714. mindspore/ops/_op_impl/aicpu/nuclear_norm.py +33 -0
  715. mindspore/ops/_op_impl/aicpu/one_hot.py +116 -0
  716. mindspore/ops/_op_impl/aicpu/ones_like.py +39 -0
  717. mindspore/ops/_op_impl/aicpu/orgqr.py +34 -0
  718. mindspore/ops/_op_impl/aicpu/pad_and_shift.py +33 -0
  719. mindspore/ops/_op_impl/aicpu/pad_v3.py +61 -0
  720. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +59 -0
  721. mindspore/ops/_op_impl/aicpu/padding.py +41 -0
  722. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +54 -0
  723. mindspore/ops/_op_impl/aicpu/pdist_grad.py +33 -0
  724. mindspore/ops/_op_impl/aicpu/poisson.py +37 -0
  725. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  726. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  727. mindspore/ops/_op_impl/aicpu/pow.py +39 -0
  728. mindspore/ops/_op_impl/aicpu/print_tensor.py +39 -0
  729. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +113 -0
  730. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  731. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  732. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  733. mindspore/ops/_op_impl/aicpu/ragged_range.py +49 -0
  734. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  735. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  736. mindspore/ops/_op_impl/aicpu/random_categorical.py +68 -0
  737. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +36 -0
  738. mindspore/ops/_op_impl/aicpu/random_gamma.py +38 -0
  739. mindspore/ops/_op_impl/aicpu/random_poisson.py +134 -0
  740. mindspore/ops/_op_impl/aicpu/random_shuffle.py +47 -0
  741. mindspore/ops/_op_impl/aicpu/randperm.py +38 -0
  742. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  743. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  744. mindspore/ops/_op_impl/aicpu/range_v2.py +35 -0
  745. mindspore/ops/_op_impl/aicpu/real.py +31 -0
  746. mindspore/ops/_op_impl/aicpu/real_div.py +40 -0
  747. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  748. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  749. mindspore/ops/_op_impl/aicpu/reduce_mean.py +57 -0
  750. mindspore/ops/_op_impl/aicpu/reduce_prod.py +57 -0
  751. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  752. mindspore/ops/_op_impl/aicpu/relu_grad_v3.py +41 -0
  753. mindspore/ops/_op_impl/aicpu/relu_v3.py +38 -0
  754. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +96 -0
  755. mindspore/ops/_op_impl/aicpu/reshape.py +42 -0
  756. mindspore/ops/_op_impl/aicpu/resize_area.py +40 -0
  757. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +20 -0
  758. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +19 -0
  759. mindspore/ops/_op_impl/aicpu/resize_bilinear.py +32 -0
  760. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +32 -0
  761. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +36 -0
  762. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +35 -0
  763. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  764. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  765. mindspore/ops/_op_impl/aicpu/reverse_sequence.py +55 -0
  766. mindspore/ops/_op_impl/aicpu/reversev2.py +54 -0
  767. mindspore/ops/_op_impl/aicpu/rgb_to_hsv.py +32 -0
  768. mindspore/ops/_op_impl/aicpu/right_shift.py +38 -0
  769. mindspore/ops/_op_impl/aicpu/rnnt_loss.py +35 -0
  770. mindspore/ops/_op_impl/aicpu/round.py +34 -0
  771. mindspore/ops/_op_impl/aicpu/rsqrt.py +33 -0
  772. mindspore/ops/_op_impl/aicpu/rsqrt_grad.py +36 -0
  773. mindspore/ops/_op_impl/aicpu/sample_distorted_bounding_box_v2.py +49 -0
  774. mindspore/ops/_op_impl/aicpu/scale_and_translate.py +52 -0
  775. mindspore/ops/_op_impl/aicpu/scale_and_translate_grad.py +36 -0
  776. mindspore/ops/_op_impl/aicpu/scatter.py +79 -0
  777. mindspore/ops/_op_impl/aicpu/scatter_add_with_axis.py +53 -0
  778. mindspore/ops/_op_impl/aicpu/scatter_elements.py +39 -0
  779. mindspore/ops/_op_impl/aicpu/scatter_nd.py +59 -0
  780. mindspore/ops/_op_impl/aicpu/scatter_nd_max.py +54 -0
  781. mindspore/ops/_op_impl/aicpu/scatter_nd_min.py +54 -0
  782. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +59 -0
  783. mindspore/ops/_op_impl/aicpu/search_sorted.py +44 -0
  784. mindspore/ops/_op_impl/aicpu/segment_max.py +52 -0
  785. mindspore/ops/_op_impl/aicpu/segment_mean.py +56 -0
  786. mindspore/ops/_op_impl/aicpu/segment_min.py +52 -0
  787. mindspore/ops/_op_impl/aicpu/segment_prod.py +56 -0
  788. mindspore/ops/_op_impl/aicpu/segment_sum.py +56 -0
  789. mindspore/ops/_op_impl/aicpu/select.py +45 -0
  790. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  791. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  792. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  793. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  794. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  795. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  796. mindspore/ops/_op_impl/aicpu/set_size.py +38 -0
  797. mindspore/ops/_op_impl/aicpu/sign.py +36 -0
  798. mindspore/ops/_op_impl/aicpu/sin.py +34 -0
  799. mindspore/ops/_op_impl/aicpu/sinc.py +43 -0
  800. mindspore/ops/_op_impl/aicpu/sinh.py +34 -0
  801. mindspore/ops/_op_impl/aicpu/slice.py +59 -0
  802. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  803. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  804. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  805. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  806. mindspore/ops/_op_impl/aicpu/space_to_depth.py +44 -0
  807. mindspore/ops/_op_impl/aicpu/sparse_addmm.py +87 -0
  808. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +80 -0
  809. mindspore/ops/_op_impl/aicpu/sparse_apply_centered_rms_prop.py +105 -0
  810. mindspore/ops/_op_impl/aicpu/sparse_apply_momentum.py +80 -0
  811. mindspore/ops/_op_impl/aicpu/sparse_apply_proximal_gradient_descent.py +79 -0
  812. mindspore/ops/_op_impl/aicpu/sparse_concat.py +59 -0
  813. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  814. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_add.py +58 -0
  815. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_div.py +58 -0
  816. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_mul.py +58 -0
  817. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  818. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  819. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  820. mindspore/ops/_op_impl/aicpu/sparse_matrix_nnz.py +81 -0
  821. mindspore/ops/_op_impl/aicpu/sparse_matrix_transpose.py +116 -0
  822. mindspore/ops/_op_impl/aicpu/sparse_reorder.py +56 -0
  823. mindspore/ops/_op_impl/aicpu/sparse_reshape.py +34 -0
  824. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_grad.py +36 -0
  825. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_with_num_segments.py +44 -0
  826. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n.py +43 -0
  827. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_grad.py +38 -0
  828. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_with_num_segments.py +44 -0
  829. mindspore/ops/_op_impl/aicpu/sparse_segment_sum.py +49 -0
  830. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  831. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  832. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  833. mindspore/ops/_op_impl/aicpu/sparse_softmax.py +33 -0
  834. mindspore/ops/_op_impl/aicpu/sparse_softmax_cross_entropy_with_logits_v2.py +35 -0
  835. mindspore/ops/_op_impl/aicpu/sparse_sparse_maximum.py +53 -0
  836. mindspore/ops/_op_impl/aicpu/sparse_sparse_minimum.py +53 -0
  837. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_add.py +84 -0
  838. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_mat_mul.py +190 -0
  839. mindspore/ops/_op_impl/aicpu/sparse_tensor_to_csr_sparse_matrix.py +51 -0
  840. mindspore/ops/_op_impl/aicpu/sparse_to_dense_v2.py +73 -0
  841. mindspore/ops/_op_impl/aicpu/split.py +45 -0
  842. mindspore/ops/_op_impl/aicpu/sqrt.py +34 -0
  843. mindspore/ops/_op_impl/aicpu/sqrt_grad.py +35 -0
  844. mindspore/ops/_op_impl/aicpu/square.py +35 -0
  845. mindspore/ops/_op_impl/aicpu/squared_difference.py +37 -0
  846. mindspore/ops/_op_impl/aicpu/squeeze.py +42 -0
  847. mindspore/ops/_op_impl/aicpu/sspaddmm.py +97 -0
  848. mindspore/ops/_op_impl/aicpu/stack.py +45 -0
  849. mindspore/ops/_op_impl/aicpu/stack_push_pop.py +87 -0
  850. mindspore/ops/_op_impl/aicpu/standard_laplace.py +34 -0
  851. mindspore/ops/_op_impl/aicpu/standard_normal.py +34 -0
  852. mindspore/ops/_op_impl/aicpu/stateless_dropout_genmask.py +37 -0
  853. mindspore/ops/_op_impl/aicpu/stft.py +70 -0
  854. mindspore/ops/_op_impl/aicpu/strided_slice.py +43 -0
  855. mindspore/ops/_op_impl/aicpu/strided_slice_grad.py +50 -0
  856. mindspore/ops/_op_impl/aicpu/sub.py +41 -0
  857. mindspore/ops/_op_impl/aicpu/sub_and_filter.py +36 -0
  858. mindspore/ops/_op_impl/aicpu/tan.py +34 -0
  859. mindspore/ops/_op_impl/aicpu/tanh.py +34 -0
  860. mindspore/ops/_op_impl/aicpu/tanh_grad.py +35 -0
  861. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  862. mindspore/ops/_op_impl/aicpu/tile.py +56 -0
  863. mindspore/ops/_op_impl/aicpu/topk.py +34 -0
  864. mindspore/ops/_op_impl/aicpu/trace.py +40 -0
  865. mindspore/ops/_op_impl/aicpu/tracegrad.py +41 -0
  866. mindspore/ops/_op_impl/aicpu/trans_data.py +35 -0
  867. mindspore/ops/_op_impl/aicpu/transpose.py +58 -0
  868. mindspore/ops/_op_impl/aicpu/tridiagonal_matmul.py +42 -0
  869. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  870. mindspore/ops/_op_impl/aicpu/tril.py +42 -0
  871. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  872. mindspore/ops/_op_impl/aicpu/triplet_margin_loss.py +62 -0
  873. mindspore/ops/_op_impl/aicpu/triu.py +43 -0
  874. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  875. mindspore/ops/_op_impl/aicpu/truncated_normal.py +39 -0
  876. mindspore/ops/_op_impl/aicpu/uniform.py +36 -0
  877. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +41 -0
  878. mindspore/ops/_op_impl/aicpu/uniform_int.py +36 -0
  879. mindspore/ops/_op_impl/aicpu/uniform_real.py +33 -0
  880. mindspore/ops/_op_impl/aicpu/unique.py +31 -0
  881. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +47 -0
  882. mindspore/ops/_op_impl/aicpu/unique_with_pad.py +32 -0
  883. mindspore/ops/_op_impl/aicpu/unravel_index.py +32 -0
  884. mindspore/ops/_op_impl/aicpu/unsorted_segment_prod.py +53 -0
  885. mindspore/ops/_op_impl/aicpu/unsorted_segment_sum.py +57 -0
  886. mindspore/ops/_op_impl/aicpu/unstack.py +45 -0
  887. mindspore/ops/_op_impl/aicpu/update_cache.py +44 -0
  888. mindspore/ops/_op_impl/aicpu/upper_bound.py +47 -0
  889. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +42 -0
  890. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +49 -0
  891. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +40 -0
  892. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +50 -0
  893. mindspore/ops/_op_impl/aicpu/xdivy.py +35 -0
  894. mindspore/ops/_op_impl/aicpu/xlogy.py +33 -0
  895. mindspore/ops/_op_impl/aicpu/zeros_like.py +42 -0
  896. mindspore/ops/_op_impl/aicpu/zeta.py +31 -0
  897. mindspore/ops/_op_impl/akg/__init__.py +19 -0
  898. mindspore/ops/_op_impl/akg/ascend/__init__.py +48 -0
  899. mindspore/ops/_op_impl/akg/ascend/abs.py +35 -0
  900. mindspore/ops/_op_impl/akg/ascend/add.py +42 -0
  901. mindspore/ops/_op_impl/akg/ascend/add_n.py +37 -0
  902. mindspore/ops/_op_impl/akg/ascend/batchmatmul.py +33 -0
  903. mindspore/ops/_op_impl/akg/ascend/cast.py +46 -0
  904. mindspore/ops/_op_impl/akg/ascend/equal.py +35 -0
  905. mindspore/ops/_op_impl/akg/ascend/exp.py +35 -0
  906. mindspore/ops/_op_impl/akg/ascend/expand_dims.py +33 -0
  907. mindspore/ops/_op_impl/akg/ascend/greater.py +34 -0
  908. mindspore/ops/_op_impl/akg/ascend/greater_equal.py +35 -0
  909. mindspore/ops/_op_impl/akg/ascend/less.py +31 -0
  910. mindspore/ops/_op_impl/akg/ascend/less_equal.py +35 -0
  911. mindspore/ops/_op_impl/akg/ascend/load_im2col.py +33 -0
  912. mindspore/ops/_op_impl/akg/ascend/log.py +34 -0
  913. mindspore/ops/_op_impl/akg/ascend/maximum.py +36 -0
  914. mindspore/ops/_op_impl/akg/ascend/minimum.py +39 -0
  915. mindspore/ops/_op_impl/akg/ascend/mul.py +41 -0
  916. mindspore/ops/_op_impl/akg/ascend/neg.py +37 -0
  917. mindspore/ops/_op_impl/akg/ascend/pow.py +35 -0
  918. mindspore/ops/_op_impl/akg/ascend/prod_force_se_a.py +33 -0
  919. mindspore/ops/_op_impl/akg/ascend/real_div.py +36 -0
  920. mindspore/ops/_op_impl/akg/ascend/reciprocal.py +32 -0
  921. mindspore/ops/_op_impl/akg/ascend/reduce_max.py +32 -0
  922. mindspore/ops/_op_impl/akg/ascend/reduce_min.py +32 -0
  923. mindspore/ops/_op_impl/akg/ascend/reduce_sum.py +37 -0
  924. mindspore/ops/_op_impl/akg/ascend/rsqrt.py +35 -0
  925. mindspore/ops/_op_impl/akg/ascend/select.py +37 -0
  926. mindspore/ops/_op_impl/akg/ascend/sqrt.py +35 -0
  927. mindspore/ops/_op_impl/akg/ascend/square.py +35 -0
  928. mindspore/ops/_op_impl/akg/ascend/sub.py +42 -0
  929. mindspore/ops/_op_impl/akg/cpu/__init__.py +23 -0
  930. mindspore/ops/_op_impl/akg/cpu/coo2csr.py +29 -0
  931. mindspore/ops/_op_impl/akg/cpu/csr2coo.py +29 -0
  932. mindspore/ops/_op_impl/akg/cpu/csr_gather.py +33 -0
  933. mindspore/ops/_op_impl/akg/cpu/csr_mm.py +34 -0
  934. mindspore/ops/_op_impl/akg/cpu/csr_mul.py +33 -0
  935. mindspore/ops/_op_impl/akg/cpu/csr_mv.py +33 -0
  936. mindspore/ops/_op_impl/akg/cpu/csr_reduce_sum.py +31 -0
  937. mindspore/ops/_op_impl/akg/gpu/__init__.py +24 -0
  938. mindspore/ops/_op_impl/akg/gpu/coo2csr.py +29 -0
  939. mindspore/ops/_op_impl/akg/gpu/csr2coo.py +29 -0
  940. mindspore/ops/_op_impl/akg/gpu/csr_div.py +36 -0
  941. mindspore/ops/_op_impl/akg/gpu/csr_gather.py +33 -0
  942. mindspore/ops/_op_impl/akg/gpu/csr_mm.py +37 -0
  943. mindspore/ops/_op_impl/akg/gpu/csr_mul.py +36 -0
  944. mindspore/ops/_op_impl/akg/gpu/csr_mv.py +36 -0
  945. mindspore/ops/_op_impl/akg/gpu/csr_reduce_sum.py +33 -0
  946. mindspore/ops/_op_impl/cpu/__init__.py +78 -0
  947. mindspore/ops/_op_impl/cpu/adam.py +49 -0
  948. mindspore/ops/_op_impl/cpu/adam_weight_decay.py +47 -0
  949. mindspore/ops/_op_impl/cpu/arg_max.py +30 -0
  950. mindspore/ops/_op_impl/cpu/arg_max_with_value.py +31 -0
  951. mindspore/ops/_op_impl/cpu/arg_min_with_value.py +31 -0
  952. mindspore/ops/_op_impl/cpu/buffer_append.py +28 -0
  953. mindspore/ops/_op_impl/cpu/buffer_get.py +28 -0
  954. mindspore/ops/_op_impl/cpu/buffer_sample.py +28 -0
  955. mindspore/ops/_op_impl/cpu/cast.py +171 -0
  956. mindspore/ops/_op_impl/cpu/concat_offset.py +38 -0
  957. mindspore/ops/_op_impl/cpu/conv2d.py +30 -0
  958. mindspore/ops/_op_impl/cpu/conv3d.py +30 -0
  959. mindspore/ops/_op_impl/cpu/div.py +32 -0
  960. mindspore/ops/_op_impl/cpu/dropout.py +31 -0
  961. mindspore/ops/_op_impl/cpu/dropout_grad.py +30 -0
  962. mindspore/ops/_op_impl/cpu/dynamic_shape.py +42 -0
  963. mindspore/ops/_op_impl/cpu/dynamic_stitch.py +41 -0
  964. mindspore/ops/_op_impl/cpu/equal_count.py +30 -0
  965. mindspore/ops/_op_impl/cpu/gather_d.py +49 -0
  966. mindspore/ops/_op_impl/cpu/gather_d_grad.py +38 -0
  967. mindspore/ops/_op_impl/cpu/gather_d_grad_v2.py +40 -0
  968. mindspore/ops/_op_impl/cpu/gather_v2.py +40 -0
  969. mindspore/ops/_op_impl/cpu/hsigmoid.py +33 -0
  970. mindspore/ops/_op_impl/cpu/hsigmoid_grad.py +34 -0
  971. mindspore/ops/_op_impl/cpu/hswish.py +32 -0
  972. mindspore/ops/_op_impl/cpu/hswish_grad.py +33 -0
  973. mindspore/ops/_op_impl/cpu/identity_n.py +40 -0
  974. mindspore/ops/_op_impl/cpu/is_finite.py +39 -0
  975. mindspore/ops/_op_impl/cpu/l2loss.py +30 -0
  976. mindspore/ops/_op_impl/cpu/layer_norm.py +36 -0
  977. mindspore/ops/_op_impl/cpu/layer_norm_grad.py +38 -0
  978. mindspore/ops/_op_impl/cpu/maximum.py +35 -0
  979. mindspore/ops/_op_impl/cpu/maximum_grad.py +47 -0
  980. mindspore/ops/_op_impl/cpu/minimum.py +40 -0
  981. mindspore/ops/_op_impl/cpu/minimum_grad.py +51 -0
  982. mindspore/ops/_op_impl/cpu/mirror_pad.py +36 -0
  983. mindspore/ops/_op_impl/cpu/mirror_pad_grad.py +36 -0
  984. mindspore/ops/_op_impl/cpu/mul.py +32 -0
  985. mindspore/ops/_op_impl/cpu/one_hot.py +31 -0
  986. mindspore/ops/_op_impl/cpu/pad.py +32 -0
  987. mindspore/ops/_op_impl/cpu/pow.py +32 -0
  988. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +42 -0
  989. mindspore/ops/_op_impl/cpu/pyexecute.py +29 -0
  990. mindspore/ops/_op_impl/cpu/pyfunc.py +29 -0
  991. mindspore/ops/_op_impl/cpu/range.py +34 -0
  992. mindspore/ops/_op_impl/cpu/real_div.py +33 -0
  993. mindspore/ops/_op_impl/cpu/reduce_all.py +29 -0
  994. mindspore/ops/_op_impl/cpu/reduce_any.py +29 -0
  995. mindspore/ops/_op_impl/cpu/reduce_max.py +32 -0
  996. mindspore/ops/_op_impl/cpu/reduce_mean.py +40 -0
  997. mindspore/ops/_op_impl/cpu/reduce_min.py +32 -0
  998. mindspore/ops/_op_impl/cpu/reduce_prod.py +40 -0
  999. mindspore/ops/_op_impl/cpu/reduce_std.py +31 -0
  1000. mindspore/ops/_op_impl/cpu/reduce_sum.py +41 -0
  1001. mindspore/ops/_op_impl/cpu/space_to_batch_nd.py +38 -0
  1002. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  1003. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  1004. mindspore/ops/_op_impl/cpu/split.py +34 -0
  1005. mindspore/ops/_op_impl/cpu/sspaddmm.py +95 -0
  1006. mindspore/ops/_op_impl/cpu/stack.py +38 -0
  1007. mindspore/ops/_op_impl/cpu/sub.py +32 -0
  1008. mindspore/ops/_op_impl/cpu/tensor_copy_slices.py +41 -0
  1009. mindspore/ops/_op_impl/cpu/tile.py +37 -0
  1010. mindspore/ops/_op_impl/cpu/top_k.py +31 -0
  1011. mindspore/ops/_op_impl/cpu/transpose.py +39 -0
  1012. mindspore/ops/_primitive_cache.py +90 -0
  1013. mindspore/ops/_register_for_op.py +73 -0
  1014. mindspore/ops/_utils/__init__.py +20 -0
  1015. mindspore/ops/_utils/utils.py +147 -0
  1016. mindspore/ops/_vmap/__init__.py +25 -0
  1017. mindspore/ops/_vmap/vmap_array_ops.py +2149 -0
  1018. mindspore/ops/_vmap/vmap_base.py +533 -0
  1019. mindspore/ops/_vmap/vmap_convolution_ops.py +441 -0
  1020. mindspore/ops/_vmap/vmap_debug_ops.py +50 -0
  1021. mindspore/ops/_vmap/vmap_grad_math_ops.py +274 -0
  1022. mindspore/ops/_vmap/vmap_grad_nn_ops.py +806 -0
  1023. mindspore/ops/_vmap/vmap_image_ops.py +194 -0
  1024. mindspore/ops/_vmap/vmap_math_ops.py +993 -0
  1025. mindspore/ops/_vmap/vmap_nn_ops.py +2250 -0
  1026. mindspore/ops/_vmap/vmap_other_ops.py +105 -0
  1027. mindspore/ops/_vmap/vmap_random_ops.py +122 -0
  1028. mindspore/ops/_vmap/vmap_sparse_ops.py +89 -0
  1029. mindspore/ops/auto_generate/__init__.py +31 -0
  1030. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
  1031. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
  1032. mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
  1033. mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
  1034. mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
  1035. mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
  1036. mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
  1037. mindspore/ops/composite/__init__.py +71 -0
  1038. mindspore/ops/composite/base.py +1318 -0
  1039. mindspore/ops/composite/env_ops.py +41 -0
  1040. mindspore/ops/composite/math_ops.py +125 -0
  1041. mindspore/ops/composite/multitype_ops/__init__.py +77 -0
  1042. mindspore/ops/composite/multitype_ops/_compile_utils.py +1459 -0
  1043. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +897 -0
  1044. mindspore/ops/composite/multitype_ops/add_impl.py +606 -0
  1045. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +56 -0
  1046. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +56 -0
  1047. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +56 -0
  1048. mindspore/ops/composite/multitype_ops/div_impl.py +189 -0
  1049. mindspore/ops/composite/multitype_ops/equal_impl.py +335 -0
  1050. mindspore/ops/composite/multitype_ops/floordiv_impl.py +88 -0
  1051. mindspore/ops/composite/multitype_ops/getitem_impl.py +400 -0
  1052. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +109 -0
  1053. mindspore/ops/composite/multitype_ops/greater_impl.py +110 -0
  1054. mindspore/ops/composite/multitype_ops/in_impl.py +196 -0
  1055. mindspore/ops/composite/multitype_ops/left_shift_impl.py +37 -0
  1056. mindspore/ops/composite/multitype_ops/less_equal_impl.py +111 -0
  1057. mindspore/ops/composite/multitype_ops/less_impl.py +112 -0
  1058. mindspore/ops/composite/multitype_ops/logic_not_impl.py +113 -0
  1059. mindspore/ops/composite/multitype_ops/logical_and_impl.py +60 -0
  1060. mindspore/ops/composite/multitype_ops/logical_or_impl.py +61 -0
  1061. mindspore/ops/composite/multitype_ops/mod_impl.py +86 -0
  1062. mindspore/ops/composite/multitype_ops/mul_impl.py +294 -0
  1063. mindspore/ops/composite/multitype_ops/negative_impl.py +79 -0
  1064. mindspore/ops/composite/multitype_ops/not_equal_impl.py +290 -0
  1065. mindspore/ops/composite/multitype_ops/not_in_impl.py +196 -0
  1066. mindspore/ops/composite/multitype_ops/ones_like_impl.py +96 -0
  1067. mindspore/ops/composite/multitype_ops/pow_impl.py +87 -0
  1068. mindspore/ops/composite/multitype_ops/right_shift_impl.py +37 -0
  1069. mindspore/ops/composite/multitype_ops/setitem_impl.py +884 -0
  1070. mindspore/ops/composite/multitype_ops/sub_impl.py +116 -0
  1071. mindspore/ops/composite/multitype_ops/uadd_impl.py +29 -0
  1072. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +228 -0
  1073. mindspore/ops/deprecated.py +315 -0
  1074. mindspore/ops/function/__init__.py +782 -0
  1075. mindspore/ops/function/array_func.py +7226 -0
  1076. mindspore/ops/function/clip_func.py +384 -0
  1077. mindspore/ops/function/debug_func.py +181 -0
  1078. mindspore/ops/function/fft_func.py +44 -0
  1079. mindspore/ops/function/grad/__init__.py +34 -0
  1080. mindspore/ops/function/grad/grad_func.py +1425 -0
  1081. mindspore/ops/function/image_func.py +292 -0
  1082. mindspore/ops/function/linalg_func.py +416 -0
  1083. mindspore/ops/function/math_func.py +12228 -0
  1084. mindspore/ops/function/nn_func.py +8609 -0
  1085. mindspore/ops/function/other_func.py +115 -0
  1086. mindspore/ops/function/parameter_func.py +134 -0
  1087. mindspore/ops/function/random_func.py +1715 -0
  1088. mindspore/ops/function/reshard_func.py +104 -0
  1089. mindspore/ops/function/sparse_func.py +884 -0
  1090. mindspore/ops/function/sparse_unary_func.py +2422 -0
  1091. mindspore/ops/function/spectral_func.py +150 -0
  1092. mindspore/ops/function/vmap_func.py +117 -0
  1093. mindspore/ops/functional.py +464 -0
  1094. mindspore/ops/op_info_register.py +1572 -0
  1095. mindspore/ops/operations/__init__.py +722 -0
  1096. mindspore/ops/operations/_csr_ops.py +403 -0
  1097. mindspore/ops/operations/_custom_grad.py +181 -0
  1098. mindspore/ops/operations/_embedding_cache_ops.py +307 -0
  1099. mindspore/ops/operations/_grad_ops.py +2978 -0
  1100. mindspore/ops/operations/_infer_ops.py +19 -0
  1101. mindspore/ops/operations/_inner_ops.py +2544 -0
  1102. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  1103. mindspore/ops/operations/_ms_kernel.py +601 -0
  1104. mindspore/ops/operations/_ocr_ops.py +379 -0
  1105. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  1106. mindspore/ops/operations/_pyfunc_registry.py +58 -0
  1107. mindspore/ops/operations/_quant_ops.py +1844 -0
  1108. mindspore/ops/operations/_rl_inner_ops.py +1231 -0
  1109. mindspore/ops/operations/_scalar_ops.py +106 -0
  1110. mindspore/ops/operations/_sequence_ops.py +1155 -0
  1111. mindspore/ops/operations/_sparse_grad_ops.py +56 -0
  1112. mindspore/ops/operations/_tensor_array.py +359 -0
  1113. mindspore/ops/operations/_thor_ops.py +807 -0
  1114. mindspore/ops/operations/array_ops.py +6124 -0
  1115. mindspore/ops/operations/comm_ops.py +1985 -0
  1116. mindspore/ops/operations/control_ops.py +127 -0
  1117. mindspore/ops/operations/custom_ops.py +1129 -0
  1118. mindspore/ops/operations/debug_ops.py +678 -0
  1119. mindspore/ops/operations/image_ops.py +1041 -0
  1120. mindspore/ops/operations/inner_ops.py +697 -0
  1121. mindspore/ops/operations/linalg_ops.py +95 -0
  1122. mindspore/ops/operations/manually_defined/__init__.py +24 -0
  1123. mindspore/ops/operations/manually_defined/_inner.py +73 -0
  1124. mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
  1125. mindspore/ops/operations/math_ops.py +5095 -0
  1126. mindspore/ops/operations/nn_ops.py +9575 -0
  1127. mindspore/ops/operations/other_ops.py +874 -0
  1128. mindspore/ops/operations/random_ops.py +1288 -0
  1129. mindspore/ops/operations/reshard_ops.py +53 -0
  1130. mindspore/ops/operations/rl_ops.py +288 -0
  1131. mindspore/ops/operations/sparse_ops.py +2753 -0
  1132. mindspore/ops/operations/spectral_ops.py +111 -0
  1133. mindspore/ops/primitive.py +1046 -0
  1134. mindspore/ops/signature.py +54 -0
  1135. mindspore/ops/vm_impl_registry.py +91 -0
  1136. mindspore/ops_generate/__init__.py +27 -0
  1137. mindspore/ops_generate/arg_dtype_cast.py +252 -0
  1138. mindspore/ops_generate/arg_handler.py +197 -0
  1139. mindspore/ops_generate/gen_aclnn_implement.py +263 -0
  1140. mindspore/ops_generate/gen_constants.py +36 -0
  1141. mindspore/ops_generate/gen_ops.py +1099 -0
  1142. mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
  1143. mindspore/ops_generate/gen_pyboost_func.py +1052 -0
  1144. mindspore/ops_generate/gen_utils.py +209 -0
  1145. mindspore/ops_generate/op_proto.py +145 -0
  1146. mindspore/ops_generate/pyboost_utils.py +367 -0
  1147. mindspore/ops_generate/template.py +261 -0
  1148. mindspore/parallel/__init__.py +30 -0
  1149. mindspore/parallel/_auto_parallel_context.py +1486 -0
  1150. mindspore/parallel/_cell_wrapper.py +174 -0
  1151. mindspore/parallel/_cost_model_context.py +700 -0
  1152. mindspore/parallel/_dp_allreduce_fusion.py +159 -0
  1153. mindspore/parallel/_offload_context.py +275 -0
  1154. mindspore/parallel/_parallel_serialization.py +561 -0
  1155. mindspore/parallel/_ps_context.py +242 -0
  1156. mindspore/parallel/_recovery_context.py +110 -0
  1157. mindspore/parallel/_tensor.py +730 -0
  1158. mindspore/parallel/_transformer/__init__.py +35 -0
  1159. mindspore/parallel/_transformer/layers.py +765 -0
  1160. mindspore/parallel/_transformer/loss.py +251 -0
  1161. mindspore/parallel/_transformer/moe.py +693 -0
  1162. mindspore/parallel/_transformer/op_parallel_config.py +222 -0
  1163. mindspore/parallel/_transformer/transformer.py +3119 -0
  1164. mindspore/parallel/_utils.py +612 -0
  1165. mindspore/parallel/algo_parameter_config.py +400 -0
  1166. mindspore/parallel/checkpoint_transform.py +650 -0
  1167. mindspore/parallel/cluster/__init__.py +15 -0
  1168. mindspore/parallel/cluster/process_entity/__init__.py +18 -0
  1169. mindspore/parallel/cluster/process_entity/_api.py +352 -0
  1170. mindspore/parallel/cluster/process_entity/_utils.py +101 -0
  1171. mindspore/parallel/cluster/run.py +136 -0
  1172. mindspore/parallel/mpi/__init__.py +14 -0
  1173. mindspore/parallel/mpi/_mpi_config.py +116 -0
  1174. mindspore/parallel/parameter_broadcast.py +151 -0
  1175. mindspore/parallel/shard.py +481 -0
  1176. mindspore/parallel/transform_safetensors.py +993 -0
  1177. mindspore/profiler/__init__.py +28 -0
  1178. mindspore/profiler/common/__init__.py +14 -0
  1179. mindspore/profiler/common/constant.py +29 -0
  1180. mindspore/profiler/common/exceptions/__init__.py +14 -0
  1181. mindspore/profiler/common/exceptions/error_code.py +83 -0
  1182. mindspore/profiler/common/exceptions/exceptions.py +286 -0
  1183. mindspore/profiler/common/process_pool.py +41 -0
  1184. mindspore/profiler/common/registry.py +47 -0
  1185. mindspore/profiler/common/singleton.py +28 -0
  1186. mindspore/profiler/common/struct_type.py +118 -0
  1187. mindspore/profiler/common/util.py +472 -0
  1188. mindspore/profiler/common/validator/__init__.py +14 -0
  1189. mindspore/profiler/common/validator/validate_path.py +84 -0
  1190. mindspore/profiler/dynamic_profiler.py +694 -0
  1191. mindspore/profiler/envprofiling.py +254 -0
  1192. mindspore/profiler/parser/__init__.py +14 -0
  1193. mindspore/profiler/parser/aicpu_data_parser.py +272 -0
  1194. mindspore/profiler/parser/ascend_analysis/__init__.py +14 -0
  1195. mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
  1196. mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
  1197. mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
  1198. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
  1199. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
  1200. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
  1201. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  1202. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
  1203. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
  1204. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
  1205. mindspore/profiler/parser/ascend_cluster_generator.py +116 -0
  1206. mindspore/profiler/parser/ascend_communicate_generator.py +314 -0
  1207. mindspore/profiler/parser/ascend_flops_generator.py +116 -0
  1208. mindspore/profiler/parser/ascend_fpbp_generator.py +82 -0
  1209. mindspore/profiler/parser/ascend_hccl_generator.py +271 -0
  1210. mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
  1211. mindspore/profiler/parser/ascend_memory_generator.py +185 -0
  1212. mindspore/profiler/parser/ascend_msprof_exporter.py +282 -0
  1213. mindspore/profiler/parser/ascend_msprof_generator.py +187 -0
  1214. mindspore/profiler/parser/ascend_op_generator.py +334 -0
  1215. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  1216. mindspore/profiler/parser/ascend_timeline_generator.py +545 -0
  1217. mindspore/profiler/parser/base_timeline_generator.py +483 -0
  1218. mindspore/profiler/parser/container.py +229 -0
  1219. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +697 -0
  1220. mindspore/profiler/parser/flops_parser.py +531 -0
  1221. mindspore/profiler/parser/framework_enum.py +111 -0
  1222. mindspore/profiler/parser/framework_parser.py +464 -0
  1223. mindspore/profiler/parser/framework_struct.py +61 -0
  1224. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  1225. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  1226. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  1227. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  1228. mindspore/profiler/parser/hccl_parser.py +573 -0
  1229. mindspore/profiler/parser/hwts_log_parser.py +122 -0
  1230. mindspore/profiler/parser/integrator.py +526 -0
  1231. mindspore/profiler/parser/memory_usage_parser.py +277 -0
  1232. mindspore/profiler/parser/minddata_analyzer.py +800 -0
  1233. mindspore/profiler/parser/minddata_parser.py +186 -0
  1234. mindspore/profiler/parser/minddata_pipeline_parser.py +299 -0
  1235. mindspore/profiler/parser/op_intermediate_parser.py +149 -0
  1236. mindspore/profiler/parser/optime_parser.py +250 -0
  1237. mindspore/profiler/parser/profiler_info.py +213 -0
  1238. mindspore/profiler/parser/step_trace_parser.py +666 -0
  1239. mindspore/profiler/profiler.py +153 -0
  1240. mindspore/profiler/profiling.py +1922 -0
  1241. mindspore/rewrite/__init__.py +28 -0
  1242. mindspore/rewrite/api/__init__.py +17 -0
  1243. mindspore/rewrite/api/node.py +519 -0
  1244. mindspore/rewrite/api/node_type.py +53 -0
  1245. mindspore/rewrite/api/pattern_engine.py +490 -0
  1246. mindspore/rewrite/api/scoped_value.py +181 -0
  1247. mindspore/rewrite/api/symbol_tree.py +497 -0
  1248. mindspore/rewrite/ast_helpers/__init__.py +25 -0
  1249. mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
  1250. mindspore/rewrite/ast_helpers/ast_finder.py +404 -0
  1251. mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
  1252. mindspore/rewrite/ast_helpers/ast_modifier.py +605 -0
  1253. mindspore/rewrite/ast_helpers/ast_replacer.py +79 -0
  1254. mindspore/rewrite/common/__init__.py +19 -0
  1255. mindspore/rewrite/common/config.py +24 -0
  1256. mindspore/rewrite/common/error_log.py +39 -0
  1257. mindspore/rewrite/common/event.py +28 -0
  1258. mindspore/rewrite/common/namer.py +271 -0
  1259. mindspore/rewrite/common/namespace.py +118 -0
  1260. mindspore/rewrite/common/observable.py +44 -0
  1261. mindspore/rewrite/common/observer.py +54 -0
  1262. mindspore/rewrite/node/__init__.py +22 -0
  1263. mindspore/rewrite/node/call_function.py +95 -0
  1264. mindspore/rewrite/node/cell_container.py +139 -0
  1265. mindspore/rewrite/node/control_flow.py +113 -0
  1266. mindspore/rewrite/node/node.py +1428 -0
  1267. mindspore/rewrite/node/node_manager.py +283 -0
  1268. mindspore/rewrite/node/node_topological_manager.py +223 -0
  1269. mindspore/rewrite/parsers/__init__.py +29 -0
  1270. mindspore/rewrite/parsers/arguments_parser.py +63 -0
  1271. mindspore/rewrite/parsers/assign_parser.py +852 -0
  1272. mindspore/rewrite/parsers/attribute_parser.py +57 -0
  1273. mindspore/rewrite/parsers/class_def_parser.py +289 -0
  1274. mindspore/rewrite/parsers/constant_parser.py +104 -0
  1275. mindspore/rewrite/parsers/container_parser.py +88 -0
  1276. mindspore/rewrite/parsers/expr_parser.py +55 -0
  1277. mindspore/rewrite/parsers/for_parser.py +61 -0
  1278. mindspore/rewrite/parsers/function_def_parser.py +84 -0
  1279. mindspore/rewrite/parsers/if_parser.py +85 -0
  1280. mindspore/rewrite/parsers/module_parser.py +117 -0
  1281. mindspore/rewrite/parsers/parser.py +43 -0
  1282. mindspore/rewrite/parsers/parser_register.py +86 -0
  1283. mindspore/rewrite/parsers/return_parser.py +37 -0
  1284. mindspore/rewrite/parsers/while_parser.py +59 -0
  1285. mindspore/rewrite/sparsify/__init__.py +0 -0
  1286. mindspore/rewrite/sparsify/sparse_transformer.py +457 -0
  1287. mindspore/rewrite/sparsify/sparsify.py +112 -0
  1288. mindspore/rewrite/sparsify/utils.py +179 -0
  1289. mindspore/rewrite/symbol_tree/__init__.py +20 -0
  1290. mindspore/rewrite/symbol_tree/symbol_tree.py +1819 -0
  1291. mindspore/rewrite/symbol_tree/symbol_tree_builder.py +76 -0
  1292. mindspore/rewrite/symbol_tree/symbol_tree_dumper.py +142 -0
  1293. mindspore/run_check/__init__.py +20 -0
  1294. mindspore/run_check/_check_version.py +507 -0
  1295. mindspore/run_check/run_check.py +66 -0
  1296. mindspore/safeguard/__init__.py +18 -0
  1297. mindspore/safeguard/rewrite_obfuscation.py +875 -0
  1298. mindspore/scipy/__init__.py +18 -0
  1299. mindspore/scipy/fft.py +264 -0
  1300. mindspore/scipy/linalg.py +919 -0
  1301. mindspore/scipy/ops.py +165 -0
  1302. mindspore/scipy/ops_grad.py +115 -0
  1303. mindspore/scipy/ops_wrapper.py +74 -0
  1304. mindspore/scipy/optimize/__init__.py +20 -0
  1305. mindspore/scipy/optimize/_bfgs.py +230 -0
  1306. mindspore/scipy/optimize/_lagrange.py +201 -0
  1307. mindspore/scipy/optimize/_lbfgs.py +146 -0
  1308. mindspore/scipy/optimize/gradient_optimization_algorithm.py +168 -0
  1309. mindspore/scipy/optimize/line_search.py +370 -0
  1310. mindspore/scipy/optimize/linear_sum_assignment.py +78 -0
  1311. mindspore/scipy/optimize/minimize.py +200 -0
  1312. mindspore/scipy/utils.py +156 -0
  1313. mindspore/scipy/utils_const.py +246 -0
  1314. mindspore/train/__init__.py +48 -0
  1315. mindspore/train/_utils.py +465 -0
  1316. mindspore/train/amp.py +935 -0
  1317. mindspore/train/anf_ir_pb2.py +1517 -0
  1318. mindspore/train/callback/__init__.py +44 -0
  1319. mindspore/train/callback/_backup_and_restore.py +117 -0
  1320. mindspore/train/callback/_callback.py +613 -0
  1321. mindspore/train/callback/_checkpoint.py +814 -0
  1322. mindspore/train/callback/_cluster_monitor.py +201 -0
  1323. mindspore/train/callback/_dataset_graph.py +150 -0
  1324. mindspore/train/callback/_early_stop.py +239 -0
  1325. mindspore/train/callback/_flops_collector.py +239 -0
  1326. mindspore/train/callback/_history.py +92 -0
  1327. mindspore/train/callback/_lambda_callback.py +80 -0
  1328. mindspore/train/callback/_landscape.py +1049 -0
  1329. mindspore/train/callback/_loss_monitor.py +107 -0
  1330. mindspore/train/callback/_lr_scheduler_callback.py +76 -0
  1331. mindspore/train/callback/_on_request_exit.py +298 -0
  1332. mindspore/train/callback/_reduce_lr_on_plateau.py +226 -0
  1333. mindspore/train/callback/_summary_collector.py +1184 -0
  1334. mindspore/train/callback/_tft_register.py +352 -0
  1335. mindspore/train/callback/_time_monitor.py +141 -0
  1336. mindspore/train/checkpoint_pb2.py +233 -0
  1337. mindspore/train/data_sink.py +219 -0
  1338. mindspore/train/dataset_helper.py +692 -0
  1339. mindspore/train/lineage_pb2.py +1260 -0
  1340. mindspore/train/loss_scale_manager.py +213 -0
  1341. mindspore/train/memory_profiling_pb2.py +298 -0
  1342. mindspore/train/metrics/__init__.py +175 -0
  1343. mindspore/train/metrics/accuracy.py +133 -0
  1344. mindspore/train/metrics/auc.py +129 -0
  1345. mindspore/train/metrics/bleu_score.py +170 -0
  1346. mindspore/train/metrics/confusion_matrix.py +700 -0
  1347. mindspore/train/metrics/cosine_similarity.py +109 -0
  1348. mindspore/train/metrics/dice.py +116 -0
  1349. mindspore/train/metrics/error.py +175 -0
  1350. mindspore/train/metrics/fbeta.py +167 -0
  1351. mindspore/train/metrics/hausdorff_distance.py +333 -0
  1352. mindspore/train/metrics/loss.py +97 -0
  1353. mindspore/train/metrics/mean_surface_distance.py +189 -0
  1354. mindspore/train/metrics/metric.py +373 -0
  1355. mindspore/train/metrics/occlusion_sensitivity.py +225 -0
  1356. mindspore/train/metrics/perplexity.py +133 -0
  1357. mindspore/train/metrics/precision.py +160 -0
  1358. mindspore/train/metrics/recall.py +159 -0
  1359. mindspore/train/metrics/roc.py +223 -0
  1360. mindspore/train/metrics/root_mean_square_surface_distance.py +191 -0
  1361. mindspore/train/metrics/topk.py +167 -0
  1362. mindspore/train/mind_ir_pb2.py +1908 -0
  1363. mindspore/train/model.py +2252 -0
  1364. mindspore/train/node_strategy_pb2.py +653 -0
  1365. mindspore/train/print_pb2.py +184 -0
  1366. mindspore/train/profiling_parallel_pb2.py +151 -0
  1367. mindspore/train/serialization.py +3325 -0
  1368. mindspore/train/summary/__init__.py +23 -0
  1369. mindspore/train/summary/_lineage_adapter.py +41 -0
  1370. mindspore/train/summary/_summary_adapter.py +496 -0
  1371. mindspore/train/summary/_writer_pool.py +207 -0
  1372. mindspore/train/summary/enums.py +56 -0
  1373. mindspore/train/summary/summary_record.py +581 -0
  1374. mindspore/train/summary/writer.py +167 -0
  1375. mindspore/train/summary_pb2.py +1165 -0
  1376. mindspore/train/train_thor/__init__.py +20 -0
  1377. mindspore/train/train_thor/convert_utils.py +268 -0
  1378. mindspore/train/train_thor/dataset_helper.py +192 -0
  1379. mindspore/train/train_thor/model_thor.py +257 -0
  1380. mindspore/utils/__init__.py +21 -0
  1381. mindspore/utils/utils.py +60 -0
  1382. mindspore/version.py +1 -0
  1383. mindspore-2.4.0.dist-info/METADATA +352 -0
  1384. mindspore-2.4.0.dist-info/RECORD +1387 -0
  1385. mindspore-2.4.0.dist-info/WHEEL +5 -0
  1386. mindspore-2.4.0.dist-info/entry_points.txt +3 -0
  1387. mindspore-2.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1703 @@
1
+ # Copyright 2020-2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ The module text.transforms is inherited from _c_dataengine
16
+ and is implemented based on ICU4C and cppjieba in C++.
17
+ It's a high performance module to process NLP text.
18
+ Users can use Vocab to build their own dictionary,
19
+ use appropriate tokenizers to split sentences into different tokens,
20
+ and use Lookup to find the index of tokens in Vocab.
21
+
22
+ .. Note::
23
+ A constructor's arguments for every class in this module must be saved into the
24
+ class attributes (self.xxx) to support save() and load().
25
+
26
+ Examples:
27
+ >>> import mindspore.dataset as ds
28
+ >>> import mindspore.dataset.text as text
29
+ >>>
30
+ >>> # Create a dataset for text sentences saved as line data in a file
31
+ >>> text_file_list = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
32
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list, shuffle=False)
33
+ >>>
34
+ >>> # Tokenize sentences to unicode characters
35
+ >>> tokenizer = text.UnicodeCharTokenizer()
36
+ >>> # Load vocabulary from list
37
+ >>> vocab = text.Vocab.from_list(word_list=['深', '圳', '欢', '迎', '您'])
38
+ >>> # Use Lookup operation to map tokens to ids
39
+ >>> lookup = text.Lookup(vocab=vocab)
40
+ >>> text_file_dataset = text_file_dataset.map(operations=[tokenizer, lookup])
41
+ >>> # if text line in dataset_file is:
42
+ >>> # 深圳欢迎您
43
+ >>> # then the output will be:
44
+ >>> # {'text': array([0, 1, 2, 3, 4], dtype=int32)}
45
+ """
46
+ import json
47
+ import os
48
+ import re
49
+ import platform
50
+ import numpy as np
51
+
52
+ import mindspore._c_dataengine as cde
53
+ from mindspore.common import dtype as mstype
54
+
55
+ from .utils import JiebaMode, NormalizeForm, to_str, SPieceTokenizerOutType, SPieceTokenizerLoadType, SentencePieceVocab
56
+ from .validators import check_add_token, check_lookup, check_jieba_add_dict, check_to_vectors, \
57
+ check_jieba_add_word, check_jieba_init, check_with_offsets, check_unicode_script_tokenizer, \
58
+ check_wordpiece_tokenizer, check_regex_replace, check_regex_tokenizer, check_basic_tokenizer, check_ngram, \
59
+ check_pair_truncate, check_to_number, check_bert_tokenizer, check_python_tokenizer, check_slidingwindow, \
60
+ check_sentence_piece_tokenizer, check_truncate
61
+ from ..core.datatypes import mstype_to_detype
62
+ from ..core.validator_helpers import replace_none
63
+ from ..transforms.py_transforms_util import Implementation
64
+ from ..transforms.transforms import TensorOperation
65
+ from ..transforms.validators import invalidate_callable
66
+
67
+
68
+ class TextTensorOperation(TensorOperation):
69
+ """
70
+ Base class of Text Tensor Ops
71
+ """
72
+
73
+ def __init__(self):
74
+ super().__init__()
75
+ self.implementation = Implementation.C
76
+
77
+ def parse(self):
78
+ raise NotImplementedError("TextTensorOperation has to implement parse() method.")
79
+
80
+
81
+ DE_C_INTER_JIEBA_MODE = {
82
+ JiebaMode.MIX: cde.JiebaMode.DE_JIEBA_MIX,
83
+ JiebaMode.MP: cde.JiebaMode.DE_JIEBA_MP,
84
+ JiebaMode.HMM: cde.JiebaMode.DE_JIEBA_HMM
85
+ }
86
+
87
+ DE_C_INTER_SENTENCEPIECE_LOADTYPE = {
88
+ SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE,
89
+ SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL
90
+ }
91
+
92
+ DE_C_INTER_SENTENCEPIECE_OUTTYPE = {
93
+ SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString,
94
+ SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT
95
+ }
96
+
97
+
98
+ class AddToken(TextTensorOperation):
99
+ """
100
+ Add token to beginning or end of sequence.
101
+
102
+ Args:
103
+ token (str): The token to be added.
104
+ begin (bool, optional): Choose the position where the token is inserted. If True,
105
+ the token will be inserted at the beginning of the sequence. Otherwise, it will
106
+ be inserted at the end of the sequence. Default: ``True``.
107
+
108
+ Raises:
109
+ TypeError: If `token` is not of type string.
110
+ TypeError: If `begin` is not of type bool.
111
+
112
+ Supported Platforms:
113
+ ``CPU``
114
+
115
+ Examples:
116
+ >>> import mindspore.dataset as ds
117
+ >>> import mindspore.dataset.text as text
118
+ >>>
119
+ >>> # Use the transform in dataset pipeline mode
120
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=[['a', 'b', 'c', 'd', 'e']], column_names=["text"])
121
+ >>> # Data before
122
+ >>> # | text |
123
+ >>> # +---------------------------+
124
+ >>> # | ['a', 'b', 'c', 'd', 'e'] |
125
+ >>> # +---------------------------+
126
+ >>> add_token_op = text.AddToken(token='TOKEN', begin=True)
127
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=add_token_op)
128
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
129
+ ... print(item["text"])
130
+ ['TOKEN' 'a' 'b' 'c' 'd' 'e']
131
+ >>> # Data after
132
+ >>> # | text |
133
+ >>> # +---------------------------+
134
+ >>> # | ['TOKEN', 'a', 'b', 'c', 'd', 'e'] |
135
+ >>> # +---------------------------+
136
+ >>>
137
+ >>> # Use the transform in eager mode
138
+ >>> data = ["happy", "birthday", "to", "you"]
139
+ >>> output = text.AddToken(token='TOKEN', begin=True)(data)
140
+ >>> print(output)
141
+ ['TOKEN' 'happy' 'birthday' 'to' 'you']
142
+
143
+ Tutorial Examples:
144
+ - `Illustration of text transforms
145
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
146
+ """
147
+
148
+ @check_add_token
149
+ def __init__(self, token, begin=True):
150
+ super().__init__()
151
+ self.token = token
152
+ self.begin = begin
153
+
154
+ def parse(self):
155
+ return cde.AddTokenOperation(self.token, self.begin)
156
+
157
+
158
+ class JiebaTokenizer(TextTensorOperation):
159
+ """
160
+ Use Jieba tokenizer to tokenize Chinese strings.
161
+
162
+ Note:
163
+ The dictionary files used by Hidden Markov Model segment and Max Probability segment can be
164
+ obtained through the `cppjieba GitHub <https://github.com/yanyiwu/cppjieba/tree/master/dict>`_ .
165
+ Please ensure the validity and integrity of these files.
166
+
167
+ Args:
168
+ hmm_path (str): Path to the dictionary file used by Hidden Markov Model segment.
169
+ mp_path (str): Path to the dictionary file used by Max Probability segment.
170
+ mode (JiebaMode, optional): The desired segment algorithms. See :class:`~.text.JiebaMode`
171
+ for details on optional values. Default: ``JiebaMode.MIX`` .
172
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
173
+ token in the original string. Default: ``False`` .
174
+
175
+ Raises:
176
+ TypeError: If `hmm_path` is not of type str.
177
+ TypeError: If `mp_path` is not of type str.
178
+ TypeError: If `mode` is not of type :class:`~.text.JiebaMode` .
179
+ TypeError: If `with_offsets` is not of type bool.
180
+
181
+ Supported Platforms:
182
+ ``CPU``
183
+
184
+ Examples:
185
+ >>> import mindspore.dataset as ds
186
+ >>> import mindspore.dataset.text as text
187
+ >>> from mindspore.dataset.text import JiebaMode
188
+ >>>
189
+ >>> # Use the transform in dataset pipeline mode
190
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["床前明月光"], column_names=["text"])
191
+ >>>
192
+ >>> # 1) If with_offsets=False, return one data column {["text", dtype=str]}
193
+ >>> # The paths to jieba_hmm_file and jieba_mp_file can be downloaded directly from the mindspore repository.
194
+ >>> # Refer to https://gitee.com/mindspore/mindspore/blob/master/tests/ut/data/dataset/jiebadict/hmm_model.utf8
195
+ >>> # and https://gitee.com/mindspore/mindspore/blob/master/tests/ut/data/dataset/jiebadict/jieba.dict.utf8
196
+ >>> jieba_hmm_file = "tests/ut/data/dataset/jiebadict/hmm_model.utf8"
197
+ >>> jieba_mp_file = "tests/ut/data/dataset/jiebadict/jieba.dict.utf8"
198
+ >>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=False)
199
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
200
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
201
+ ... print(item["text"])
202
+ ['床' '前' '明月光']
203
+ >>>
204
+ >>> # 2) If with_offsets=True, return three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
205
+ >>> # ["offsets_limit", dtype=uint32]}
206
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["床前明月光"], column_names=["text"])
207
+ >>> tokenizer_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP, with_offsets=True)
208
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op, input_columns=["text"],
209
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
210
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
211
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
212
+ ['床' '前' '明月光'] [0 3 6] [ 3 6 15]
213
+ >>>
214
+ >>> # Use the transform in eager mode
215
+ >>> data = "床前明月光"
216
+ >>> output = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)(data)
217
+ >>> print(output)
218
+ ['床' '前' '明月光']
219
+
220
+ Tutorial Examples:
221
+ - `Illustration of text transforms
222
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
223
+ """
224
+
225
+ @check_jieba_init
226
+ def __init__(self, hmm_path, mp_path, mode=JiebaMode.MIX, with_offsets=False):
227
+ super().__init__()
228
+ if not isinstance(mode, JiebaMode):
229
+ raise TypeError("Wrong input type for mode, should be JiebaMode.")
230
+
231
+ self.mode = mode
232
+ self.__check_path__(hmm_path)
233
+ self.hmm_path = hmm_path
234
+ self.__check_path__(mp_path)
235
+ self.mp_path = mp_path
236
+ self.with_offsets = with_offsets
237
+ self.words = []
238
+
239
+ def __check_path__(self, model_path):
240
+ """check model path"""
241
+ if not os.path.exists(os.path.realpath(model_path)):
242
+ raise ValueError(
243
+ " jieba mode file {} is not exist.".format(model_path))
244
+
245
+ def parse(self):
246
+ jieba_tokenizer = cde.JiebaTokenizerOperation(self.hmm_path, self.mp_path,
247
+ DE_C_INTER_JIEBA_MODE.get(self.mode),
248
+ self.with_offsets)
249
+ for word in self.words:
250
+ jieba_tokenizer.add_word(word[0], word[1])
251
+ return jieba_tokenizer
252
+
253
+ @invalidate_callable
254
+ @check_jieba_add_word
255
+ def add_word(self, word, freq=None):
256
+ """
257
+ Add a specified word mapping to the Vocab of the tokenizer.
258
+
259
+ Args:
260
+ word (str): The word to be added to the Vocab.
261
+ freq (int, optional): The frequency of the word to be added. The higher the word frequency,
262
+ the greater the chance that the word will be tokenized. Default: ``None``, using the
263
+ default word frequency.
264
+
265
+ Examples:
266
+ >>> import mindspore.dataset as ds
267
+ >>> import mindspore.dataset.text as text
268
+ >>> from mindspore.dataset.text import JiebaMode
269
+ >>>
270
+ >>> jieba_hmm_file = "/path/to/jieba/hmm/file"
271
+ >>> jieba_mp_file = "/path/to/jieba/mp/file"
272
+ >>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
273
+ >>> sentence_piece_vocab_file = "/path/to/sentence/piece/vocab/file"
274
+ >>> with open(sentence_piece_vocab_file, 'r') as f:
275
+ ... for line in f:
276
+ ... word = line.split(',')[0]
277
+ ... jieba_op.add_word(word)
278
+ >>>
279
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
280
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
281
+ >>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
282
+ """
283
+
284
+ if freq is None:
285
+ self.words.append((word, 0))
286
+ else:
287
+ self.words.append((word, freq))
288
+
289
+ @invalidate_callable
290
+ @check_jieba_add_dict
291
+ def add_dict(self, user_dict):
292
+ """
293
+ Add the specified word mappings to the Vocab of the tokenizer.
294
+
295
+ Args:
296
+ user_dict (Union[str, dict[str, int]]): The word mappings to be added to the Vocab.
297
+ If the input type is str, it means the path of the file storing the word mappings to be added.
298
+ Each line of the file should contain two fields separated by a space, where the first field
299
+ indicates the word itself and the second field should be a number indicating the word frequency.
300
+ Invalid lines will be ignored and no error or warning will be returned.
301
+ If the input type is dict[str, int], it means the dictionary storing the word mappings to be added,
302
+ where the key name is the word itself and the key value is the word frequency.
303
+
304
+ Examples:
305
+ >>> import mindspore.dataset as ds
306
+ >>> import mindspore.dataset.text as text
307
+ >>> from mindspore.dataset.text import JiebaMode
308
+ >>>
309
+ >>> jieba_hmm_file = "/path/to/jieba/hmm/file"
310
+ >>> jieba_mp_file = "/path/to/jieba/mp/file"
311
+ >>> user_dict = {"男默女泪": 10}
312
+ >>> jieba_op = text.JiebaTokenizer(jieba_hmm_file, jieba_mp_file, mode=JiebaMode.MP)
313
+ >>> jieba_op.add_dict(user_dict)
314
+ >>>
315
+ >>> text_file_list = ["/path/to/text_file_dataset_file"]
316
+ >>> text_file_dataset = ds.TextFileDataset(dataset_files=text_file_list)
317
+ >>> text_file_dataset = text_file_dataset.map(operations=jieba_op, input_columns=["text"])
318
+ """
319
+
320
+ if isinstance(user_dict, str):
321
+ self.__add_dict_py_file(user_dict)
322
+ elif isinstance(user_dict, dict):
323
+ for k, v in user_dict.items():
324
+ self.add_word(k, v)
325
+ else:
326
+ raise TypeError("The type of user_dict must str or dict.")
327
+
328
+ def __add_dict_py_file(self, file_path):
329
+ """Add user defined word by file"""
330
+ words_list = self.__parser_file(file_path)
331
+ for data in words_list:
332
+ if data[1] is None:
333
+ freq = 0
334
+ else:
335
+ freq = int(data[1])
336
+ self.add_word(data[0], freq)
337
+
338
+ def __decode(self, data):
339
+ """decode the dict file to utf8"""
340
+ try:
341
+ data = data.decode('utf-8')
342
+ except UnicodeDecodeError:
343
+ raise ValueError("user dict file must be utf8 format.")
344
+ return data.lstrip('\ufeff')
345
+
346
+ def __parser_file(self, file_path):
347
+ """parser user defined word by file"""
348
+ if not os.path.exists(file_path):
349
+ raise ValueError(
350
+ "user dict file {} is not exist.".format(file_path))
351
+ real_file_path = os.path.realpath(file_path)
352
+ file_dict = open(real_file_path, "r")
353
+ data_re = re.compile('^\\s*([^\\s*]+?)\\s*([0-9]+)?\\s*$', re.U)
354
+ words_list = []
355
+ for item in file_dict:
356
+ data = item.strip()
357
+ if not isinstance(data, str):
358
+ data = self.__decode(data)
359
+ tmp = data_re.match(data)
360
+ if not tmp:
361
+ continue
362
+ words = tmp.groups()
363
+ words_list.append(words)
364
+ file_dict.close()
365
+ return words_list
366
+
367
+
368
+ class Lookup(TextTensorOperation):
369
+ """
370
+ Look up a word into an id according to the input vocabulary table.
371
+
372
+ Args:
373
+ vocab (Vocab): A vocabulary object.
374
+ unknown_token (str, optional): Word is used for lookup. In case of the word is out of vocabulary (OOV),
375
+ the result of lookup will be replaced with unknown_token. If the unknown_token is not specified or
376
+ it is OOV, runtime error will be thrown. Default: ``None``, means no unknown_token is specified.
377
+ data_type (mindspore.dtype, optional): The data type that lookup operation maps
378
+ string to. Default: ``mstype.int32``.
379
+
380
+ Raises:
381
+ TypeError: If `vocab` is not of type text.Vocab.
382
+ TypeError: If `unknown_token` is not of type string.
383
+ TypeError: If `data_type` is not of type mindspore.dtype.
384
+
385
+ Supported Platforms:
386
+ ``CPU``
387
+
388
+ Examples:
389
+ >>> import mindspore.dataset as ds
390
+ >>> import mindspore.dataset.text as text
391
+ >>>
392
+ >>> # Use the transform in dataset pipeline mode
393
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["with"], column_names=["text"])
394
+ >>> # Load vocabulary from list
395
+ >>> vocab = text.Vocab.from_list(["?", "##", "with", "the", "test", "符号"])
396
+ >>> # Use Lookup operation to map tokens to ids
397
+ >>> lookup = text.Lookup(vocab)
398
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[lookup])
399
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
400
+ ... print(item["text"])
401
+ 2
402
+ >>>
403
+ >>> # Use the transform in eager mode
404
+ >>> vocab = text.Vocab.from_list(["?", "##", "with", "the", "test", "符号"])
405
+ >>> data = "with"
406
+ >>> output = text.Lookup(vocab=vocab, unknown_token="test")(data)
407
+ >>> print(output)
408
+ 2
409
+
410
+ Tutorial Examples:
411
+ - `Illustration of text transforms
412
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
413
+ """
414
+
415
+ @check_lookup
416
+ def __init__(self, vocab, unknown_token=None, data_type=mstype.int32):
417
+ super().__init__()
418
+ self.vocab = vocab
419
+ self.unknown_token = unknown_token
420
+ self.data_type = data_type
421
+
422
+ def parse(self):
423
+ return cde.LookupOperation(self.vocab.c_vocab, self.unknown_token, str(mstype_to_detype(self.data_type)))
424
+
425
+
426
+ class Ngram(TextTensorOperation):
427
+ """
428
+ Generate n-gram from a 1-D string Tensor.
429
+
430
+ Refer to `N-gram <https://en.wikipedia.org/wiki/N-gram#Examples>`_
431
+ for an overview of what n-gram is and how it works.
432
+
433
+ Args:
434
+ n (list[int]): n in n-gram, which is a list of positive integers. For example, if n=[4, 3], then the result
435
+ would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up
436
+ for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore", "best"] will result in
437
+ an empty string produced.
438
+ left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width).
439
+ `pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the
440
+ sequence with "__". Default: ``('', 0)``.
441
+ right_pad (tuple, optional): Padding performed on right side of the sequence shaped like
442
+ ("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("_", 2)
443
+ would pad right side of the sequence with "__". Default: ``('', 0)``.
444
+ separator (str, optional): Symbol used to join strings together. For example, if 2-gram is
445
+ ["mindspore", "amazing"] with separator is ``"-"``, the result would be ["mindspore-amazing"].
446
+ Default: ``' '``, which will use whitespace as separator.
447
+
448
+ Raises:
449
+ TypeError: If values of `n` not positive is not of type int.
450
+ ValueError: If values of `n` not positive.
451
+ ValueError: If `left_pad` is not a tuple of length 2.
452
+ ValueError: If `right_pad` is not a tuple of length 2.
453
+ TypeError: If `separator` is not of type string.
454
+
455
+ Supported Platforms:
456
+ ``CPU``
457
+
458
+ Examples:
459
+ >>> import numpy as np
460
+ >>> import mindspore.dataset as ds
461
+ >>> import mindspore.dataset.text as text
462
+ >>>
463
+ >>> # Use the transform in dataset pipeline mode
464
+ >>> def gen(texts):
465
+ ... for line in texts:
466
+ ... yield(np.array(line.split(" "), dtype=str),)
467
+ >>> data = ["WildRose Country", "Canada's Ocean Playground", "Land of Living Skies"]
468
+ >>> generator_dataset = ds.GeneratorDataset(gen(data), ["text"])
469
+ >>> ngram_op = text.Ngram(3, separator="-")
470
+ >>> generator_dataset = generator_dataset.map(operations=ngram_op)
471
+ >>> for item in generator_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
472
+ ... print(item["text"])
473
+ ... break
474
+ ['']
475
+ >>>
476
+ >>> # Use the transform in eager mode
477
+ >>> output = ngram_op(data)
478
+ >>> print(output)
479
+ ["WildRose Country-Canada's Ocean Playground-Land of Living Skies"]
480
+
481
+ Tutorial Examples:
482
+ - `Illustration of text transforms
483
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
484
+ """
485
+
486
+ @check_ngram
487
+ def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "):
488
+ super().__init__()
489
+ self.ngrams = n
490
+ self.left_pad = left_pad
491
+ self.right_pad = right_pad
492
+ self.separator = separator
493
+
494
+ def parse(self):
495
+ return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator)
496
+
497
+
498
+ class PythonTokenizer:
499
+ """
500
+ Class that applies user-defined string tokenizer into input string.
501
+
502
+ Args:
503
+ tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens.
504
+
505
+ Raises:
506
+ TypeError: If `tokenizer` is not a callable Python function.
507
+
508
+ Supported Platforms:
509
+ ``CPU``
510
+
511
+ Examples:
512
+ >>> import numpy as np
513
+ >>> import mindspore.dataset as ds
514
+ >>> import mindspore.dataset.text as text
515
+ >>>
516
+ >>> # Use the transform in dataset pipeline mode
517
+ >>> def my_tokenizer(line):
518
+ ... return line.split()
519
+ >>>
520
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Hello world'], column_names=["text"])
521
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
522
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
523
+ ... print(item["text"])
524
+ ['Hello' 'world']
525
+ >>>
526
+ >>> # Use the transform in eager mode
527
+ >>> data = np.array('Hello world'.encode())
528
+ >>> output = text.PythonTokenizer(my_tokenizer)(data)
529
+ >>> print(output)
530
+ ['Hello' 'world']
531
+
532
+ Tutorial Examples:
533
+ - `Illustration of text transforms
534
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
535
+ """
536
+
537
+ @check_python_tokenizer
538
+ def __init__(self, tokenizer):
539
+ self.pyfunc = tokenizer
540
+ self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)')
541
+ self.random = False
542
+
543
+ def __call__(self, in_array):
544
+ if not isinstance(in_array, np.ndarray):
545
+ raise TypeError("input should be a NumPy array. Got {}.".format(type(in_array)))
546
+ if in_array.dtype.type is np.bytes_:
547
+ in_array = to_str(in_array)
548
+ try:
549
+ tokens = self.tokenizer(in_array)
550
+ except Exception as e:
551
+ raise RuntimeError("Error occurred in Pyfunc [" + str(self.pyfunc.__name__) + "], error message: " + str(e))
552
+ return tokens
553
+
554
+ def to_json(self):
555
+ json_obj = {}
556
+ json_obj["tensor_op_name"] = self.pyfunc.__name__
557
+ json_obj["python_module"] = self.__class__.__module__
558
+ return json.dumps(json_obj)
559
+
560
+
561
+ class SentencePieceTokenizer(TextTensorOperation):
562
+ """
563
+ Tokenize scalar token or 1-D tokens to tokens by sentencepiece.
564
+
565
+ Args:
566
+ mode (Union[str, SentencePieceVocab]): SentencePiece model.
567
+ If the input parameter is a file, it represents the path of SentencePiece mode to be loaded.
568
+ If the input parameter is a SentencePieceVocab object, it should be constructed in advanced.
569
+ out_type (SPieceTokenizerOutType): The type of output, it can be ``SPieceTokenizerOutType.STRING``,
570
+ ``SPieceTokenizerOutType.INT``.
571
+
572
+ - ``SPieceTokenizerOutType.STRING``, means output type of SentencePice Tokenizer is string.
573
+ - ``SPieceTokenizerOutType.INT``, means output type of SentencePice Tokenizer is int.
574
+
575
+ Raises:
576
+ TypeError: If `mode` is not of type string or SentencePieceVocab.
577
+ TypeError: If `out_type` is not of type SPieceTokenizerOutType.
578
+
579
+ Supported Platforms:
580
+ ``CPU``
581
+
582
+ Examples:
583
+ >>> import mindspore.dataset as ds
584
+ >>> import mindspore.dataset.text as text
585
+ >>> from mindspore.dataset.text import SentencePieceModel, SPieceTokenizerOutType
586
+ >>>
587
+ >>> # Use the transform in dataset pipeline mode
588
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Hello world'], column_names=["text"])
589
+ >>> # The paths to sentence_piece_vocab_file can be downloaded directly from the mindspore repository. Refer to
590
+ >>> # https://gitee.com/mindspore/mindspore/blob/master/tests/ut/data/dataset/test_sentencepiece/vocab.txt
591
+ >>> sentence_piece_vocab_file = "tests/ut/data/dataset/test_sentencepiece/vocab.txt"
592
+ >>> vocab = text.SentencePieceVocab.from_file([sentence_piece_vocab_file], 512, 0.9995,
593
+ ... SentencePieceModel.UNIGRAM, {})
594
+ >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)
595
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer)
596
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
597
+ ... print(item["text"])
598
+ ['▁H' 'e' 'l' 'lo' '▁w' 'o' 'r' 'l' 'd']
599
+ >>>
600
+ >>> # Use the transform in eager mode
601
+ >>> data = "Hello world"
602
+ >>> vocab = text.SentencePieceVocab.from_file([sentence_piece_vocab_file], 100, 0.9995,
603
+ ... SentencePieceModel.UNIGRAM, {})
604
+ >>> output = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING)(data)
605
+ >>> print(output)
606
+ ['▁' 'H' 'e' 'l' 'l' 'o' '▁' 'w' 'o' 'r' 'l' 'd']
607
+
608
+ Tutorial Examples:
609
+ - `Illustration of text transforms
610
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
611
+ """
612
+
613
+ @check_sentence_piece_tokenizer
614
+ def __init__(self, mode, out_type):
615
+ super().__init__()
616
+ self.mode = mode
617
+ self.out_type = out_type
618
+
619
+ def parse(self):
620
+ self.mode = self.mode.c_sentence_piece_vocab if isinstance(self.mode, SentencePieceVocab) else self.mode
621
+ return cde.SentencePieceTokenizerOperation(self.mode, DE_C_INTER_SENTENCEPIECE_OUTTYPE.get(self.out_type))
622
+
623
+
624
+ class SlidingWindow(TextTensorOperation):
625
+ """
626
+ Construct a tensor from given data (only support 1-D for now), where each element in the dimension axis
627
+ is a slice of data starting at the corresponding position, with a specified width.
628
+
629
+ Args:
630
+ width (int): The width of the window. It must be an integer and greater than zero.
631
+ axis (int, optional): The axis along which the sliding window is computed. Default: ``0``.
632
+
633
+ Raises:
634
+ TypeError: If `width` is not of type int.
635
+ ValueError: If value of `width` is not positive.
636
+ TypeError: If `axis` is not of type int.
637
+
638
+ Supported Platforms:
639
+ ``CPU``
640
+
641
+ Examples:
642
+ >>> import mindspore.dataset as ds
643
+ >>> import mindspore.dataset.text as text
644
+ >>>
645
+ >>> # Use the transform in dataset pipeline mode
646
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=[[1, 2, 3, 4, 5]], column_names=["col1"])
647
+ >>> # Data before
648
+ >>> # | col1 |
649
+ >>> # +--------------+
650
+ >>> # | [[1, 2, 3, 4, 5]] |
651
+ >>> # +--------------+
652
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=text.SlidingWindow(3, 0))
653
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
654
+ ... print(item["col1"])
655
+ [[1 2 3] [2 3 4] [3 4 5]]
656
+ >>> # Data after
657
+ >>> # | col1 |
658
+ >>> # +--------------+
659
+ >>> # | [[1, 2, 3], |
660
+ >>> # | [2, 3, 4], |
661
+ >>> # | [3, 4, 5]] |
662
+ >>> # +--------------+
663
+ >>>
664
+ >>> # Use the transform in eager mode
665
+ >>> data = ["happy", "birthday", "to", "you"]
666
+ >>> output = text.SlidingWindow(2, 0)(data)
667
+ >>> print(output)
668
+ [['happy' 'birthday'] ['birthday' 'to'] ['to' 'you']]
669
+
670
+ Tutorial Examples:
671
+ - `Illustration of text transforms
672
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
673
+ """
674
+
675
+ @check_slidingwindow
676
+ def __init__(self, width, axis=0):
677
+ super().__init__()
678
+ self.width = width
679
+ self.axis = axis
680
+
681
+ def parse(self):
682
+ return cde.SlidingWindowOperation(self.width, self.axis)
683
+
684
+
685
+ class ToNumber(TextTensorOperation):
686
+ """
687
+ Tensor operation to convert every element of a string tensor to a number.
688
+
689
+ Strings are cast according to the rules specified in the following links, except that any strings which represent
690
+ negative numbers cannot be cast to an unsigned integer type, rules links are as follows:
691
+ https://en.cppreference.com/w/cpp/string/basic_string/stof,
692
+ https://en.cppreference.com/w/cpp/string/basic_string/stoul.
693
+
694
+ Args:
695
+ data_type (mindspore.dtype): Type to be cast to. Must be a numeric type in mindspore.dtype.
696
+
697
+ Raises:
698
+ TypeError: If `data_type` is not of type mindspore.dtype.
699
+ RuntimeError: If strings are invalid to cast, or are out of range after being cast.
700
+
701
+ Supported Platforms:
702
+ ``CPU``
703
+
704
+ Examples:
705
+ >>> import mindspore.dataset as ds
706
+ >>> import mindspore.dataset.text as text
707
+ >>> from mindspore import dtype as mstype
708
+ >>>
709
+ >>> # Use the transform in dataset pipeline mode
710
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=[["1", "2", "3"]], column_names=["text"])
711
+ >>> to_number_op = text.ToNumber(mstype.int8)
712
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=to_number_op)
713
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
714
+ ... print(item["text"])
715
+ [1 2 3]
716
+ >>>
717
+ >>> # Use the transform in eager mode
718
+ >>> data = ["1", "2", "3"]
719
+ >>> output = text.ToNumber(mstype.uint32)(data)
720
+ >>> print(output)
721
+ [1 2 3]
722
+
723
+ Tutorial Examples:
724
+ - `Illustration of text transforms
725
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
726
+ """
727
+
728
+ @check_to_number
729
+ def __init__(self, data_type):
730
+ super().__init__()
731
+ data_type = mstype_to_detype(data_type)
732
+ self.data_type = str(data_type)
733
+
734
+ def parse(self):
735
+ return cde.ToNumberOperation(self.data_type)
736
+
737
+
738
+ class ToVectors(TextTensorOperation):
739
+ """
740
+ Look up a token into vectors according to the input vector table.
741
+
742
+ Args:
743
+ vectors (Vectors): A vectors object.
744
+ unk_init (sequence, optional): Sequence used to initialize out-of-vectors (OOV) token.
745
+ Default: ``None``, initialize with zero vectors.
746
+ lower_case_backup (bool, optional): Whether to look up the token in the lower case. If ``False``,
747
+ each token in the original case will be looked up; if ``True``, each token in the original
748
+ case will be looked up first, if not found in the keys of the property stoi, the token in the
749
+ lower case will be looked up. Default: ``False``.
750
+
751
+ Raises:
752
+ TypeError: If `unk_init` is not of type sequence.
753
+ TypeError: If elements of `unk_init` is not of type float or int.
754
+ TypeError: If `lower_case_backup` is not of type bool.
755
+
756
+ Supported Platforms:
757
+ ``CPU``
758
+
759
+ Examples:
760
+ >>> import mindspore.dataset as ds
761
+ >>> import mindspore.dataset.text as text
762
+ >>>
763
+ >>> # Use the transform in dataset pipeline mode
764
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["happy", "birthday", "to", "you"], column_names=["text"])
765
+ >>> # Load vectors from file
766
+ >>> # The paths to vectors_file can be downloaded directly from the mindspore repository. Refer to
767
+ >>> # https://gitee.com/mindspore/mindspore/blob/master/tests/ut/data/dataset/testVectors/vectors.txt
768
+ >>> vectors_file = "tests/ut/data/dataset/testVectors/vectors.txt"
769
+ >>> vectors = text.Vectors.from_file(vectors_file)
770
+ >>> # Use ToVectors operation to map tokens to vectors
771
+ >>> to_vectors = text.ToVectors(vectors)
772
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=[to_vectors])
773
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
774
+ ... print(item["text"])
775
+ ... break
776
+ [0. 0. 0. 0. 0. 0.]
777
+ >>>
778
+ >>> # Use the transform in eager mode
779
+ >>> data = ["happy"]
780
+ >>> output = text.ToVectors(vectors)(data)
781
+ >>> print(output)
782
+ [0. 0. 0. 0. 0. 0.]
783
+
784
+ Tutorial Examples:
785
+ - `Illustration of text transforms
786
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
787
+ """
788
+
789
+ @check_to_vectors
790
+ def __init__(self, vectors, unk_init=None, lower_case_backup=False):
791
+ super().__init__()
792
+ self.vectors = vectors
793
+ self.unk_init = unk_init if unk_init is not None else []
794
+ self.lower_case_backup = lower_case_backup
795
+
796
+ def parse(self):
797
+ return cde.ToVectorsOperation(self.vectors, self.unk_init, self.lower_case_backup)
798
+
799
+
800
+ class Truncate(TextTensorOperation):
801
+ """
802
+ Truncate the input sequence so that it does not exceed the maximum length.
803
+
804
+ Args:
805
+ max_seq_len (int): Maximum allowable length.
806
+
807
+ Raises:
808
+ TypeError: If `max_length_len` is not of type int.
809
+ ValueError: If value of `max_length_len` is not greater than or equal to 0.
810
+ RuntimeError: If the input tensor is not of dtype bool, int, float, double or str.
811
+
812
+ Supported Platforms:
813
+ ``CPU``
814
+
815
+ Examples:
816
+ >>> import mindspore.dataset as ds
817
+ >>> import mindspore.dataset.text as text
818
+ >>>
819
+ >>> # Use the transform in dataset pipeline mode
820
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=[['a', 'b', 'c', 'd', 'e']], column_names=["text"],
821
+ ... shuffle=False)
822
+ >>> # Data before
823
+ >>> # | col1 |
824
+ >>> # +---------------------------+
825
+ >>> # | ['a', 'b', 'c', 'd', 'e'] |
826
+ >>> # +---------------------------+
827
+ >>> truncate = text.Truncate(4)
828
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=truncate, input_columns=["text"])
829
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
830
+ ... print(item["text"])
831
+ ['a' 'b' 'c' 'd']
832
+ >>> # Data after
833
+ >>> # | col1 |
834
+ >>> # +------------------------+
835
+ >>> # | ['a', 'b', 'c', 'd'] |
836
+ >>> # +------------------------+
837
+ >>>
838
+ >>> # Use the transform in eager mode
839
+ >>> data = ["happy", "birthday", "to", "you"]
840
+ >>> output = text.Truncate(2)(data)
841
+ >>> print(output)
842
+ ['happy' 'birthday']
843
+
844
+ Tutorial Examples:
845
+ - `Illustration of text transforms
846
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
847
+ """
848
+
849
+ @check_truncate
850
+ def __init__(self, max_seq_len):
851
+ super().__init__()
852
+ self.max_seq_len = max_seq_len
853
+
854
+ def parse(self):
855
+ return cde.TruncateOperation(self.max_seq_len)
856
+
857
+
858
+ class TruncateSequencePair(TextTensorOperation):
859
+ """
860
+ Truncate a pair of 1-D string input so that their total length is less than the specified length.
861
+
862
+ Args:
863
+ max_length (int): The maximum total length of the output strings. If it is no less than the
864
+ total length of the original pair of strings, no truncation is performed; otherwise, the
865
+ longer of the two input strings is truncated until its total length equals this value.
866
+
867
+ Raises:
868
+ TypeError: If `max_length` is not of type int.
869
+
870
+ Supported Platforms:
871
+ ``CPU``
872
+
873
+ Examples:
874
+ >>> import mindspore.dataset as ds
875
+ >>> import mindspore.dataset.text as text
876
+ >>>
877
+ >>> # Use the transform in dataset pipeline mode
878
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=([[1, 2, 3]], [[4, 5]]), column_names=["col1", "col2"])
879
+ >>> # Data before
880
+ >>> # | col1 | col2 |
881
+ >>> # +-----------+-----------|
882
+ >>> # | [1, 2, 3] | [4, 5] |
883
+ >>> # +-----------+-----------+
884
+ >>> truncate_sequence_pair_op = text.TruncateSequencePair(max_length=4)
885
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=truncate_sequence_pair_op,
886
+ ... input_columns=["col1", "col2"])
887
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
888
+ ... print(item["col1"], item["col2"])
889
+ [1 2] [4 5]
890
+ >>> # Data after
891
+ >>> # | col1 | col2 |
892
+ >>> # +-----------+-----------+
893
+ >>> # | [1, 2] | [4, 5] |
894
+ >>> # +-----------+-----------+
895
+ >>>
896
+ >>> # Use the transform in eager mode
897
+ >>> data = [["1", "2", "3"], ["4", "5"]]
898
+ >>> output = text.TruncateSequencePair(4)(*data)
899
+ >>> print(output)
900
+ (array(['1', '2'], dtype='<U1'), array(['4', '5'], dtype='<U1'))
901
+
902
+ Tutorial Examples:
903
+ - `Illustration of text transforms
904
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
905
+ """
906
+
907
+ @check_pair_truncate
908
+ def __init__(self, max_length):
909
+ super().__init__()
910
+ self.max_length = max_length
911
+
912
+ def parse(self):
913
+ return cde.TruncateSequencePairOperation(self.max_length)
914
+
915
+
916
+ class UnicodeCharTokenizer(TextTensorOperation):
917
+ """
918
+ Unpack the Unicode characters in the input strings.
919
+
920
+ Args:
921
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
922
+ token in the original string. Default: ``False`` .
923
+
924
+ Raises:
925
+ TypeError: If `with_offsets` is not of type bool.
926
+
927
+ Supported Platforms:
928
+ ``CPU``
929
+
930
+ Examples:
931
+ >>> import mindspore.dataset as ds
932
+ >>> import mindspore.dataset.text as text
933
+ >>>
934
+ >>> # Use the transform in dataset pipeline mode
935
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
936
+ >>>
937
+ >>> # If with_offsets=False, default output one column {["text", dtype=str]}
938
+ >>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=False)
939
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
940
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
941
+ ... print(item["text"])
942
+ ... break
943
+ ['W' 'e' 'l' 'c' 'o' 'm' 'e' ' ' ' ' ' ' ' ' ' ' 'T' 'o' ' ' ' ' ' ' 'B' 'e' 'i' 'J' 'i' 'n' 'g' '!']
944
+ >>>
945
+ >>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
946
+ >>> # ["offsets_limit", dtype=uint32]}
947
+ >>> tokenizer_op = text.UnicodeCharTokenizer(with_offsets=True)
948
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
949
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op, input_columns=["text"],
950
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
951
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
952
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
953
+ ['W' 'e' 'l' 'c' 'o' 'm' 'e' ' ' ' ' ' ' ' ' ' ' 'T' 'o' ' ' ' ' ' ' 'B' 'e' 'i' 'J' 'i' 'n' 'g' '!'] [ 0 1 2
954
+ 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24] [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
955
+ 16 17 18 19 20 21 22 23 24 25]
956
+ >>>
957
+ >>> # Use the transform in eager mode
958
+ >>> data = 'Welcome To BeiJing!'
959
+ >>> output = text.UnicodeCharTokenizer(with_offsets=True)(data)
960
+ >>> print(output)
961
+ (array(['W', 'e', 'l', 'c', 'o', 'm', 'e', ' ', ' ', ' ', ' ', ' ', 'T', 'o', ' ', ' ', ' ', 'B', 'e', 'i', 'J',
962
+ 'i', 'n', 'g', '!'], dtype='<U1'), array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
963
+ 17, 18, 19, 20, 21, 22, 23, 24], dtype=uint32), array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
964
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], dtype=uint32))
965
+
966
+ Tutorial Examples:
967
+ - `Illustration of text transforms
968
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
969
+ """
970
+
971
+ @check_with_offsets
972
+ def __init__(self, with_offsets=False):
973
+ super().__init__()
974
+ self.with_offsets = with_offsets
975
+
976
+ def parse(self):
977
+ return cde.UnicodeCharTokenizerOperation(self.with_offsets)
978
+
979
+
980
+ class WordpieceTokenizer(TextTensorOperation):
981
+ """
982
+ Tokenize the input text to subword tokens.
983
+
984
+ Args:
985
+ vocab (Vocab): Vocabulary used to look up words.
986
+ suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
987
+ max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
988
+ not be split. Default: ``100``.
989
+ unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
990
+ unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
991
+ output. Default: ``'[UNK]'``.
992
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
993
+ token in the original string. Default: ``False`` .
994
+
995
+ Raises:
996
+ TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
997
+ TypeError: If `suffix_indicator` is not of type str.
998
+ TypeError: If `max_bytes_per_token` is not of type int.
999
+ TypeError: If `unknown_token` is not of type str.
1000
+ TypeError: If `with_offsets` is not of type bool.
1001
+ ValueError: If `max_bytes_per_token` is negative.
1002
+
1003
+ Supported Platforms:
1004
+ ``CPU``
1005
+
1006
+ Examples:
1007
+ >>> import mindspore.dataset as ds
1008
+ >>> import mindspore.dataset.text as text
1009
+ >>>
1010
+ >>> # Use the transform in dataset pipeline mode
1011
+ >>> seed = ds.config.get_seed()
1012
+ >>> ds.config.set_seed(12345)
1013
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["happy", "birthday", "to", "you"], column_names=["text"])
1014
+ >>>
1015
+ >>> vocab_list = ["book", "cholera", "era", "favor", "##ite", "my", "is", "love", "dur", "##ing", "the"]
1016
+ >>> vocab = text.Vocab.from_list(vocab_list)
1017
+ >>>
1018
+ >>> # If with_offsets=False, default output one column {["text", dtype=str]}
1019
+ >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
1020
+ ... max_bytes_per_token=100, with_offsets=False)
1021
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1022
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1023
+ ... print(item["text"])
1024
+ ... break
1025
+ ['[UNK]']
1026
+ >>>
1027
+ >>> # If with_offsets=True, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
1028
+ >>> # ["offsets_limit", dtype=uint32]}
1029
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["happy", "birthday", "to", "you"], column_names=["text"])
1030
+ >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token='[UNK]',
1031
+ ... max_bytes_per_token=100, with_offsets=True)
1032
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op, input_columns=["text"],
1033
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1034
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1035
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1036
+ ... break
1037
+ ['[UNK]'] [0] [5]
1038
+ >>>
1039
+ >>> # Use the transform in eager mode
1040
+ >>> data = ["happy", "birthday", "to", "you"]
1041
+ >>> vocab_list = ["book", "cholera", "era", "favor", "**ite", "my", "is", "love", "dur", "**ing", "the"]
1042
+ >>> vocab = text.Vocab.from_list(vocab_list)
1043
+ >>> output = text.WordpieceTokenizer(vocab=vocab, suffix_indicator="y", unknown_token='[UNK]')(data)
1044
+ >>> print(output)
1045
+ ['[UNK]' '[UNK]' '[UNK]' '[UNK]']
1046
+ >>> ds.config.set_seed(seed)
1047
+
1048
+ Tutorial Examples:
1049
+ - `Illustration of text transforms
1050
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1051
+ """
1052
+
1053
+ @check_wordpiece_tokenizer
1054
+ def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]',
1055
+ with_offsets=False):
1056
+ super().__init__()
1057
+ self.vocab = vocab
1058
+ self.suffix_indicator = suffix_indicator
1059
+ self.max_bytes_per_token = max_bytes_per_token
1060
+ self.unknown_token = unknown_token
1061
+ self.with_offsets = with_offsets
1062
+
1063
+ def parse(self):
1064
+ return cde.WordpieceTokenizerOperation(self.vocab.c_vocab, self.suffix_indicator, self.max_bytes_per_token,
1065
+ self.unknown_token, self.with_offsets)
1066
+
1067
+
1068
+ if platform.system().lower() != 'windows':
1069
+ DE_C_INTER_NORMALIZE_FORM = {
1070
+ NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
1071
+ NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC,
1072
+ NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC,
1073
+ NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD,
1074
+ NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD
1075
+ }
1076
+
1077
+
1078
+ class BasicTokenizer(TextTensorOperation):
1079
+ """
1080
+ Tokenize the input UTF-8 encoded string by specific rules.
1081
+
1082
+ Note:
1083
+ `BasicTokenizer` is not supported on Windows platform yet.
1084
+
1085
+ Args:
1086
+ lower_case (bool, optional): Whether to perform lowercase processing on the text. If True, will fold the
1087
+ text to lower case and strip accented characters. If False, will only perform normalization on the
1088
+ text, with mode specified by `normalization_form` . Default: ``False``.
1089
+ keep_whitespace (bool, optional): If True, the whitespace will be kept in the output. Default: ``False``.
1090
+ normalization_form (NormalizeForm, optional): The desired normalization form.
1091
+ See :class:`~.text.NormalizeForm` for details on optional values.
1092
+ Default: ``NormalizeForm.NFKC`` .
1093
+ preserve_unused_token (bool, optional): Whether to preserve special tokens. If True, will not split special
1094
+ tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'. Default: ``True``.
1095
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1096
+ token in the original string. Default: ``False`` .
1097
+
1098
+ Raises:
1099
+ TypeError: If `lower_case` is not of type bool.
1100
+ TypeError: If `keep_whitespace` is not of type bool.
1101
+ TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
1102
+ TypeError: If `preserve_unused_token` is not of type bool.
1103
+ TypeError: If `with_offsets` is not of type bool.
1104
+ RuntimeError: If dtype of input Tensor is not str.
1105
+
1106
+ Supported Platforms:
1107
+ ``CPU``
1108
+
1109
+ Examples:
1110
+ >>> import mindspore.dataset as ds
1111
+ >>> import mindspore.dataset.text as text
1112
+ >>> from mindspore.dataset.text import NormalizeForm
1113
+ >>>
1114
+ >>> # Use the transform in dataset pipeline mode
1115
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
1116
+ >>>
1117
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1118
+ >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
1119
+ ... keep_whitespace=False,
1120
+ ... normalization_form=NormalizeForm.NONE,
1121
+ ... preserve_unused_token=True,
1122
+ ... with_offsets=False)
1123
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1124
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1125
+ ... print(item["text"])
1126
+ ['Welcome' 'To' 'BeiJing' '!']
1127
+ >>>
1128
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1129
+ >>> # ["offsets_start", dtype=uint32],
1130
+ >>> # ["offsets_limit", dtype=uint32]}
1131
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
1132
+ >>> tokenizer_op = text.BasicTokenizer(lower_case=False,
1133
+ ... keep_whitespace=False,
1134
+ ... normalization_form=NormalizeForm.NONE,
1135
+ ... preserve_unused_token=True,
1136
+ ... with_offsets=True)
1137
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(
1138
+ ... operations=tokenizer_op, input_columns=["text"],
1139
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1140
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1141
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1142
+ ['Welcome' 'To' 'BeiJing' '!'] [ 0 12 17 24] [ 7 14 24 25]
1143
+ >>>
1144
+ >>> # Use the transform in eager mode
1145
+ >>> data = 'Welcome To BeiJing!'
1146
+ >>> output = text.BasicTokenizer()(data)
1147
+ >>> print(output)
1148
+ ['Welcome' 'To' 'BeiJing' '!']
1149
+
1150
+ Tutorial Examples:
1151
+ - `Illustration of text transforms
1152
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1153
+ """
1154
+
1155
+ @check_basic_tokenizer
1156
+ def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
1157
+ preserve_unused_token=True, with_offsets=False):
1158
+ super().__init__()
1159
+ if not isinstance(normalization_form, NormalizeForm):
1160
+ raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")
1161
+
1162
+ self.lower_case = lower_case
1163
+ self.keep_whitespace = keep_whitespace
1164
+ self.normalization_form = DE_C_INTER_NORMALIZE_FORM.get(normalization_form)
1165
+ self.preserve_unused_token = preserve_unused_token
1166
+ self.with_offsets = with_offsets
1167
+
1168
+ def parse(self):
1169
+ return cde.BasicTokenizerOperation(self.lower_case, self.keep_whitespace, self.normalization_form,
1170
+ self.preserve_unused_token, self.with_offsets)
1171
+
1172
+
1173
+ class BertTokenizer(TextTensorOperation):
1174
+ """
1175
+ Tokenizer used for Bert text process.
1176
+
1177
+ Note:
1178
+ `BertTokenizer` is not supported on Windows platform yet.
1179
+
1180
+ Args:
1181
+ vocab (Vocab): Vocabulary used to look up words.
1182
+ suffix_indicator (str, optional): Prefix flags used to indicate subword suffixes. Default: ``'##'``.
1183
+ max_bytes_per_token (int, optional): The maximum length of tokenization, words exceeding this length will
1184
+ not be split. Default: ``100``.
1185
+ unknown_token (str, optional): The output for unknown words. When set to an empty string, the corresponding
1186
+ unknown word will be directly returned as the output. Otherwise, the set string will be returned as the
1187
+ output. Default: ``'[UNK]'``.
1188
+ lower_case (bool, optional): Whether to perform lowercase processing on the text. If ``True``, will fold the
1189
+ text to lower case and strip accented characters. If ``False``, will only perform normalization on the
1190
+ text, with mode specified by `normalization_form` . Default: ``False``.
1191
+ keep_whitespace (bool, optional): If ``True``, the whitespace will be kept in the output.
1192
+ Default: ``False``.
1193
+ normalization_form (NormalizeForm, optional): The desired normalization form.
1194
+ See :class:`~.text.NormalizeForm` for details on optional values.
1195
+ Default: ``NormalizeForm.NFKC`` .
1196
+ preserve_unused_token (bool, optional): Whether to preserve special tokens. If ``True``,
1197
+ will not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'.
1198
+ Default: ``True``.
1199
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1200
+ token in the original string. Default: ``False`` .
1201
+
1202
+ Raises:
1203
+ TypeError: If `vocab` is not of type :class:`mindspore.dataset.text.Vocab` .
1204
+ TypeError: If `suffix_indicator` is not of type str.
1205
+ TypeError: If `max_bytes_per_token` is not of type int.
1206
+ ValueError: If `max_bytes_per_token` is negative.
1207
+ TypeError: If `unknown_token` is not of type str.
1208
+ TypeError: If `lower_case` is not of type bool.
1209
+ TypeError: If `keep_whitespace` is not of type bool.
1210
+ TypeError: If `normalization_form` is not of type :class:`~.text.NormalizeForm` .
1211
+ TypeError: If `preserve_unused_token` is not of type bool.
1212
+ TypeError: If `with_offsets` is not of type bool.
1213
+
1214
+ Supported Platforms:
1215
+ ``CPU``
1216
+
1217
+ Examples:
1218
+ >>> import numpy as np
1219
+ >>> import mindspore.dataset as ds
1220
+ >>> import mindspore.dataset.text as text
1221
+ >>> from mindspore.dataset.text import NormalizeForm
1222
+ >>>
1223
+ >>> # Use the transform in dataset pipeline mode
1224
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["床前明月光"], column_names=["text"])
1225
+ >>>
1226
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1227
+ >>> vocab_list = ["床", "前", "明", "月", "光", "疑", "是", "地", "上", "霜", "举", "头", "望", "低",
1228
+ ... "思", "故", "乡", "繁", "體", "字", "嘿", "哈", "大", "笑", "嘻", "i", "am", "mak",
1229
+ ... "make", "small", "mistake", "##s", "during", "work", "##ing", "hour", "+", "/",
1230
+ ... "-", "=", "12", "28", "40", "16", " ", "I", "[CLS]", "[SEP]", "[UNK]", "[PAD]", "[MASK]"]
1231
+ >>> vocab = text.Vocab.from_list(vocab_list)
1232
+ >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
1233
+ ... unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
1234
+ ... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
1235
+ ... with_offsets=False)
1236
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1237
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1238
+ ... print(item["text"])
1239
+ ['床' '前' '明' '月' '光']
1240
+ >>>
1241
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1242
+ >>> # ["offsets_start", dtype=uint32],
1243
+ >>> # ["offsets_limit", dtype=uint32]}
1244
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["床前明月光"], column_names=["text"])
1245
+ >>> tokenizer_op = text.BertTokenizer(vocab=vocab, suffix_indicator='##', max_bytes_per_token=100,
1246
+ ... unknown_token='[UNK]', lower_case=False, keep_whitespace=False,
1247
+ ... normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
1248
+ ... with_offsets=True)
1249
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(
1250
+ ... operations=tokenizer_op,
1251
+ ... input_columns=["text"],
1252
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1253
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1254
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1255
+ ['床' '前' '明' '月' '光'] [ 0 3 6 9 12] [ 3 6 9 12 15]
1256
+ >>>
1257
+ >>> # Use the transform in eager mode
1258
+ >>> data = "床前明月光"
1259
+ >>> vocab = text.Vocab.from_list(vocab_list)
1260
+ >>> tokenizer_op = text.BertTokenizer(vocab=vocab)
1261
+ >>> output = tokenizer_op(data)
1262
+ >>> print(output)
1263
+ ['床' '前' '明' '月' '光']
1264
+
1265
+ Tutorial Examples:
1266
+ - `Illustration of text transforms
1267
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1268
+ """
1269
+
1270
+ @check_bert_tokenizer
1271
+ def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]',
1272
+ lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
1273
+ preserve_unused_token=True, with_offsets=False):
1274
+ super().__init__()
1275
+ if not isinstance(normalization_form, NormalizeForm):
1276
+ raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")
1277
+
1278
+ self.vocab = vocab
1279
+ self.suffix_indicator = suffix_indicator
1280
+ self.max_bytes_per_token = max_bytes_per_token
1281
+ self.unknown_token = unknown_token
1282
+ self.lower_case = lower_case
1283
+ self.keep_whitespace = keep_whitespace
1284
+ self.normalization_form = DE_C_INTER_NORMALIZE_FORM.get(normalization_form)
1285
+ self.preserve_unused_token = preserve_unused_token
1286
+ self.with_offsets = with_offsets
1287
+
1288
+ def parse(self):
1289
+ return cde.BertTokenizerOperation(self.vocab.c_vocab, self.suffix_indicator, self.max_bytes_per_token,
1290
+ self.unknown_token, self.lower_case, self.keep_whitespace,
1291
+ self.normalization_form, self.preserve_unused_token, self.with_offsets)
1292
+
1293
+
1294
+ class CaseFold(TextTensorOperation):
1295
+ """
1296
+ Apply case fold operation on UTF-8 string tensor, which is aggressive that can convert more characters into
1297
+ lower case than :code:`str.lower` . For supported normalization forms, please refer to
1298
+ `ICU_Normalizer2 <https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1Normalizer2.html>`_ .
1299
+
1300
+ Note:
1301
+ CaseFold is not supported on Windows platform yet.
1302
+
1303
+ Supported Platforms:
1304
+ ``CPU``
1305
+
1306
+ Examples:
1307
+ >>> import mindspore.dataset as ds
1308
+ >>> import mindspore.dataset.text as text
1309
+ >>>
1310
+ >>> # Use the transform in dataset pipeline mode
1311
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
1312
+ >>> case_op = text.CaseFold()
1313
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=case_op)
1314
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1315
+ ... print(item["text"])
1316
+ welcome to beijing!
1317
+ >>>
1318
+ >>> # Use the transform in eager mode
1319
+ >>> data = 'Welcome To BeiJing!'
1320
+ >>> output = text.CaseFold()(data)
1321
+ >>> print(output)
1322
+ welcome to beijing!
1323
+
1324
+ Tutorial Examples:
1325
+ - `Illustration of text transforms
1326
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1327
+ """
1328
+
1329
+ def parse(self):
1330
+ return cde.CaseFoldOperation()
1331
+
1332
+
1333
+ class FilterWikipediaXML(TextTensorOperation):
1334
+ """
1335
+ Filter Wikipedia XML dumps to "clean" text consisting only of lowercase letters (a-z, converted from A-Z),
1336
+ and spaces (never consecutive).
1337
+
1338
+ Note:
1339
+ FilterWikipediaXML is not supported on Windows platform yet.
1340
+
1341
+ Supported Platforms:
1342
+ ``CPU``
1343
+
1344
+ Examples:
1345
+ >>> import mindspore.dataset as ds
1346
+ >>> import mindspore.dataset.text as text
1347
+ >>>
1348
+ >>> # Use the transform in dataset pipeline mode
1349
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["Welcome to China", "!!!", "ABC"],
1350
+ ... column_names=["text"], shuffle=False)
1351
+ >>> replace_op = text.FilterWikipediaXML()
1352
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=replace_op)
1353
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1354
+ ... print(item["text"])
1355
+ ... break
1356
+ welcome to china
1357
+ >>>
1358
+ >>> # Use the transform in eager mode
1359
+ >>> data = "Welcome to China"
1360
+ >>> output = replace_op(data)
1361
+ >>> print(output)
1362
+ welcome to china
1363
+
1364
+ Tutorial Examples:
1365
+ - `Illustration of text transforms
1366
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1367
+ """
1368
+
1369
+ def parse(self):
1370
+ return cde.FilterWikipediaXMLOperation()
1371
+
1372
+
1373
+ class NormalizeUTF8(TextTensorOperation):
1374
+ """
1375
+ Normalize the input UTF-8 encoded strings.
1376
+
1377
+ Note:
1378
+ NormalizeUTF8 is not supported on Windows platform yet.
1379
+
1380
+ Args:
1381
+ normalize_form (NormalizeForm, optional): The desired normalization form.
1382
+ See :class:`~.text.NormalizeForm` for details on optional values.
1383
+ Default: ``NormalizeForm.NFKC`` .
1384
+
1385
+ Raises:
1386
+ TypeError: If `normalize_form` is not of type :class:`~.text.NormalizeForm`.
1387
+
1388
+ Supported Platforms:
1389
+ ``CPU``
1390
+
1391
+ Examples:
1392
+ >>> import mindspore.dataset as ds
1393
+ >>> import mindspore.dataset.text as text
1394
+ >>> from mindspore.dataset.text import NormalizeForm
1395
+ >>>
1396
+ >>> # Use the transform in dataset pipeline mode
1397
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["ṩ", "ḍ̇", "q̇", "fi", "2⁵", "ẛ"],
1398
+ ... column_names=["text"], shuffle=False)
1399
+ >>> normalize_op = text.NormalizeUTF8(normalize_form=NormalizeForm.NFC)
1400
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=normalize_op)
1401
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1402
+ ... print(item["text"])
1403
+ ... break
1404
+
1405
+ >>>
1406
+ >>> # Use the transform in eager mode
1407
+ >>> data = ["ṩ", "ḍ̇", "q̇", "fi", "2⁵", "ẛ"]
1408
+ >>> output = text.NormalizeUTF8(NormalizeForm.NFKC)(data)
1409
+ >>> print(output)
1410
+ ['ṩ' 'ḍ̇' 'q̇' 'fi' '25' 'ṡ']
1411
+
1412
+ Tutorial Examples:
1413
+ - `Illustration of text transforms
1414
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1415
+ """
1416
+
1417
+ def __init__(self, normalize_form=NormalizeForm.NFKC):
1418
+ super().__init__()
1419
+ if not isinstance(normalize_form, NormalizeForm):
1420
+ raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")
1421
+
1422
+ normalize_form = replace_none(normalize_form, NormalizeForm.NFKC)
1423
+ self.normalize_form = DE_C_INTER_NORMALIZE_FORM.get(normalize_form)
1424
+
1425
+ def parse(self):
1426
+ return cde.NormalizeUTF8Operation(self.normalize_form)
1427
+
1428
+
1429
+ class RegexReplace(TextTensorOperation):
1430
+ """
1431
+ Replace part of the input UTF-8 string with a difference text string using regular expressions.
1432
+
1433
+ Note:
1434
+ RegexReplace is not supported on Windows platform yet.
1435
+
1436
+ Args:
1437
+ pattern (str): The regular expression, used to mean the specific, standard textual syntax for
1438
+ representing patterns for matching text.
1439
+ replace (str): The string used to replace the matched elements.
1440
+ replace_all (bool, optional): Whether to replace all matched elements. If ``False``, only the
1441
+ first matched element will be replaced; otherwise, all matched elements will be replaced.
1442
+ Default: ``True``.
1443
+
1444
+ Raises:
1445
+ TypeError: If `pattern` is not of type str.
1446
+ TypeError: If `replace` is not of type str.
1447
+ TypeError: If `replace_all` is not of type bool.
1448
+
1449
+ Supported Platforms:
1450
+ ``CPU``
1451
+
1452
+ Examples:
1453
+ >>> import mindspore.dataset as ds
1454
+ >>> import mindspore.dataset.text as text
1455
+ >>>
1456
+ >>> # Use the transform in dataset pipeline mode
1457
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['apple orange apple orange apple'],
1458
+ ... column_names=["text"])
1459
+ >>> regex_replace = text.RegexReplace('apple', 'orange')
1460
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=regex_replace)
1461
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1462
+ ... print(item["text"])
1463
+ orange orange orange orange orange
1464
+ >>>
1465
+ >>> # Use the transform in eager mode
1466
+ >>> data = 'onetwoonetwoone'
1467
+ >>> output = text.RegexReplace(pattern="one", replace="two", replace_all=True)(data)
1468
+ >>> print(output)
1469
+ twotwotwotwotwo
1470
+
1471
+ Tutorial Examples:
1472
+ - `Illustration of text transforms
1473
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1474
+ """
1475
+
1476
+ @check_regex_replace
1477
+ def __init__(self, pattern, replace, replace_all=True):
1478
+ super().__init__()
1479
+ self.pattern = pattern
1480
+ self.replace = replace
1481
+ self.replace_all = replace_all
1482
+
1483
+ def parse(self):
1484
+ return cde.RegexReplaceOperation(self.pattern, self.replace, self.replace_all)
1485
+
1486
+
1487
+ class RegexTokenizer(TextTensorOperation):
1488
+ """
1489
+ Tokenize a scalar tensor of UTF-8 string by regex expression pattern.
1490
+
1491
+ See https://unicode-org.github.io/icu/userguide/strings/regexp.html for supported regex pattern.
1492
+
1493
+ Note:
1494
+ RegexTokenizer is not supported on Windows platform yet.
1495
+
1496
+ Args:
1497
+ delim_pattern (str): The pattern of regex delimiters.
1498
+ The original string will be split by matched elements.
1499
+ keep_delim_pattern (str, optional): The string matched by 'delim_pattern' can be kept as a token
1500
+ if it can be matched by 'keep_delim_pattern'. The default value is an empty str
1501
+ which means that delimiters will not be kept as an output token. Default: ``''``.
1502
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1503
+ token in the original string. Default: ``False`` .
1504
+
1505
+ Raises:
1506
+ TypeError: If `delim_pattern` is not of type string.
1507
+ TypeError: If `keep_delim_pattern` is not of type string.
1508
+ TypeError: If `with_offsets` is not of type bool.
1509
+
1510
+ Supported Platforms:
1511
+ ``CPU``
1512
+
1513
+ Examples:
1514
+ >>> import mindspore.dataset as ds
1515
+ >>> import mindspore.dataset.text as text
1516
+ >>>
1517
+ >>> # Use the transform in dataset pipeline mode
1518
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome |, To |, BeiJing!'],
1519
+ ... column_names=["text"])
1520
+ >>>
1521
+ >>> # 1) If with_offsets=False, default output is one column {["text", dtype=str]}
1522
+ >>> delim_pattern = r"[ |,]"
1523
+ >>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=False)
1524
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1525
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1526
+ ... print(item["text"])
1527
+ ['Welcome' 'To' 'BeiJing!']
1528
+ >>>
1529
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1530
+ >>> # ["offsets_start", dtype=uint32],
1531
+ >>> # ["offsets_limit", dtype=uint32]}
1532
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome |, To |, BeiJing!'],
1533
+ ... column_names=["text"])
1534
+ >>> tokenizer_op = text.RegexTokenizer(delim_pattern, with_offsets=True)
1535
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(
1536
+ ... operations=tokenizer_op,
1537
+ ... input_columns=["text"],
1538
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1539
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1540
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1541
+ ['Welcome' 'To' 'BeiJing!'] [ 0 13 21] [ 7 15 29]
1542
+ >>>
1543
+ >>> # Use the transform in eager mode
1544
+ >>> data = 'Welcome To BeiJing!'
1545
+ >>> output = text.RegexTokenizer(delim_pattern="To", keep_delim_pattern="To", with_offsets=True)(data)
1546
+ >>> print(output)
1547
+ (array(['Welcome ', 'To', ' BeiJing!'], dtype='<U12'),
1548
+ array([ 0, 12, 14], dtype=uint32), array([12, 14, 25], dtype=uint32))
1549
+
1550
+ Tutorial Examples:
1551
+ - `Illustration of text transforms
1552
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1553
+ """
1554
+
1555
+ @check_regex_tokenizer
1556
+ def __init__(self, delim_pattern, keep_delim_pattern='', with_offsets=False):
1557
+ super().__init__()
1558
+ self.delim_pattern = delim_pattern
1559
+ self.keep_delim_pattern = keep_delim_pattern
1560
+ self.with_offsets = with_offsets
1561
+
1562
+ def parse(self):
1563
+ return cde.RegexTokenizerOperation(self.delim_pattern, self.keep_delim_pattern, self.with_offsets)
1564
+
1565
+
1566
+ class UnicodeScriptTokenizer(TextTensorOperation):
1567
+ """
1568
+ Tokenize a scalar tensor of UTF-8 string based on Unicode script boundaries.
1569
+
1570
+ Note:
1571
+ UnicodeScriptTokenizer is not supported on Windows platform yet.
1572
+
1573
+ Args:
1574
+ keep_whitespace (bool, optional): Whether or not emit whitespace tokens. Default: ``False``.
1575
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1576
+ token in the original string. Default: ``False`` .
1577
+
1578
+ Raises:
1579
+ TypeError: If `keep_whitespace` is not of type bool.
1580
+ TypeError: If `with_offsets` is not of type bool.
1581
+
1582
+ Supported Platforms:
1583
+ ``CPU``
1584
+
1585
+ Examples:
1586
+ >>> import mindspore.dataset as ds
1587
+ >>> import mindspore.dataset.text as text
1588
+ >>>
1589
+ >>> # Use the transform in dataset pipeline mode
1590
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["北 京", "123", "欢 迎", "你"],
1591
+ ... column_names=["text"], shuffle=False)
1592
+ >>>
1593
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1594
+ >>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=False)
1595
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1596
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1597
+ ... print(item["text"])
1598
+ ... break
1599
+ ['北' ' ' '京']
1600
+ >>>
1601
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1602
+ >>> # ["offsets_start", dtype=uint32],
1603
+ >>> # ["offsets_limit", dtype=uint32]}
1604
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=["北 京", "123", "欢 迎", "你"],
1605
+ ... column_names=["text"], shuffle=False)
1606
+ >>> tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=True)
1607
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(
1608
+ ... operations=tokenizer_op,
1609
+ ... input_columns=["text"],
1610
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1611
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1612
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1613
+ ... break
1614
+ ['北' ' ' '京'] [0 3 4] [3 4 7]
1615
+ >>>
1616
+ >>> # Use the transform in eager mode
1617
+ >>> data = "北 京"
1618
+ >>> unicode_script_tokenizer_op = text.UnicodeScriptTokenizer(keep_whitespace=True, with_offsets=False)
1619
+ >>> output = unicode_script_tokenizer_op(data)
1620
+ >>> print(output)
1621
+ ['北' ' ' '京']
1622
+
1623
+ Tutorial Examples:
1624
+ - `Illustration of text transforms
1625
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1626
+
1627
+ """
1628
+
1629
+ @check_unicode_script_tokenizer
1630
+ def __init__(self, keep_whitespace=False, with_offsets=False):
1631
+ super().__init__()
1632
+ keep_whitespace = replace_none(keep_whitespace, False)
1633
+ with_offsets = replace_none(with_offsets, False)
1634
+ self.keep_whitespace = keep_whitespace
1635
+ self.with_offsets = with_offsets
1636
+
1637
+ def parse(self):
1638
+ return cde.UnicodeScriptTokenizerOperation(self.keep_whitespace, self.with_offsets)
1639
+
1640
+
1641
+ class WhitespaceTokenizer(TextTensorOperation):
1642
+ """
1643
+ Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'.
1644
+
1645
+ Note:
1646
+ WhitespaceTokenizer is not supported on Windows platform yet.
1647
+
1648
+ Args:
1649
+ with_offsets (bool, optional): Whether to output the start and end offsets of each
1650
+ token in the original string. Default: ``False`` .
1651
+
1652
+ Raises:
1653
+ TypeError: If `with_offsets` is not of type bool.
1654
+
1655
+ Supported Platforms:
1656
+ ``CPU``
1657
+
1658
+ Examples:
1659
+ >>> import mindspore.dataset as ds
1660
+ >>> import mindspore.dataset.text as text
1661
+ >>>
1662
+ >>> # Use the transform in dataset pipeline mode
1663
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
1664
+ >>>
1665
+ >>> # 1) If with_offsets=False, default output one column {["text", dtype=str]}
1666
+ >>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=False)
1667
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=tokenizer_op)
1668
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1669
+ ... print(item["text"])
1670
+ ['Welcome' 'To' 'BeiJing!']
1671
+ >>>
1672
+ >>> # 2) If with_offsets=True, then output three columns {["token", dtype=str],
1673
+ >>> # ["offsets_start", dtype=uint32],
1674
+ >>> # ["offsets_limit", dtype=uint32]}
1675
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=['Welcome To BeiJing!'], column_names=["text"])
1676
+ >>> tokenizer_op = text.WhitespaceTokenizer(with_offsets=True)
1677
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(
1678
+ ... operations=tokenizer_op,
1679
+ ... input_columns=["text"],
1680
+ ... output_columns=["token", "offsets_start", "offsets_limit"])
1681
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1682
+ ... print(item["token"], item["offsets_start"], item["offsets_limit"])
1683
+ ['Welcome' 'To' 'BeiJing!'] [ 0 12 17] [ 7 14 25]
1684
+ >>>
1685
+ >>> # Use the transform in eager mode
1686
+ >>> data = 'Welcome To BeiJing!'
1687
+ >>> output = text.WhitespaceTokenizer(with_offsets=True)(data)
1688
+ >>> print(output)
1689
+ (array(['Welcome', 'To', 'BeiJing!'], dtype='<U8'), array([ 0, 12, 17], dtype=uint32),
1690
+ array([ 7, 14, 25], dtype=uint32))
1691
+
1692
+ Tutorial Examples:
1693
+ - `Illustration of text transforms
1694
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/text_gallery.html>`_
1695
+ """
1696
+
1697
+ @check_with_offsets
1698
+ def __init__(self, with_offsets=False):
1699
+ super().__init__()
1700
+ self.with_offsets = with_offsets
1701
+
1702
+ def parse(self):
1703
+ return cde.WhitespaceTokenizerOperation(self.with_offsets)