mindspore 1.10.0__cp37-none-any.whl → 2.0.0rc1__cp37-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (944) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +9064 -0
  3. mindspore/__init__.py +9 -4
  4. mindspore/_akg/akg/composite/build_module.py +11 -0
  5. mindspore/_akg/akg/config/repository_cuda.json +11 -0
  6. mindspore/_akg/akg/tvm/contrib/nvcc.py +4 -3
  7. mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
  8. mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
  9. mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
  10. mindspore/_check_jit_forbidden_api.py +102 -0
  11. mindspore/_checkparam.py +1066 -1001
  12. mindspore/_extends/builtin_operations.py +32 -4
  13. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  14. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  15. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  16. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  17. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  18. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  19. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  20. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  21. mindspore/_extends/parse/__init__.py +5 -3
  22. mindspore/_extends/parse/namespace.py +17 -2
  23. mindspore/_extends/parse/parser.py +193 -34
  24. mindspore/_extends/parse/resources.py +7 -8
  25. mindspore/_extends/parse/standard_method.py +1780 -435
  26. mindspore/_extends/parse/trope.py +3 -1
  27. mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
  28. mindspore/amp.py +53 -58
  29. mindspore/bin/cache_admin +0 -0
  30. mindspore/bin/cache_server +0 -0
  31. mindspore/boost/adasum.py +3 -2
  32. mindspore/boost/boost.py +2 -2
  33. mindspore/boost/boost_cell_wrapper.py +46 -26
  34. mindspore/boost/dim_reduce.py +6 -5
  35. mindspore/boost/grad_accumulation.py +2 -1
  36. mindspore/boost/group_loss_scale_manager.py +1 -1
  37. mindspore/common/__init__.py +11 -10
  38. mindspore/common/_decorator.py +2 -0
  39. mindspore/common/_register_for_adapter.py +55 -0
  40. mindspore/common/_stub_tensor.py +201 -0
  41. mindspore/common/_utils.py +57 -0
  42. mindspore/common/api.py +582 -297
  43. mindspore/common/dtype.py +66 -18
  44. mindspore/common/dump.py +2 -2
  45. mindspore/common/initializer.py +38 -1
  46. mindspore/common/jit_config.py +25 -13
  47. mindspore/common/mutable.py +53 -24
  48. mindspore/common/parameter.py +60 -37
  49. mindspore/common/seed.py +8 -24
  50. mindspore/common/sparse_tensor.py +927 -0
  51. mindspore/common/tensor.py +1627 -3900
  52. mindspore/communication/__init__.py +10 -5
  53. mindspore/communication/_comm_helper.py +78 -214
  54. mindspore/communication/_hccl_management.py +2 -1
  55. mindspore/communication/management.py +136 -47
  56. mindspore/config/op_info.config +501 -1008
  57. mindspore/config/super_bar_config.json +512 -0
  58. mindspore/context.py +291 -56
  59. mindspore/dataset/__init__.py +12 -8
  60. mindspore/dataset/audio/__init__.py +9 -9
  61. mindspore/dataset/audio/transforms.py +1090 -228
  62. mindspore/dataset/audio/utils.py +87 -39
  63. mindspore/dataset/audio/validators.py +223 -1
  64. mindspore/dataset/callback/ds_callback.py +17 -15
  65. mindspore/dataset/core/config.py +246 -17
  66. mindspore/dataset/core/py_util_helpers.py +4 -3
  67. mindspore/dataset/core/validator_helpers.py +10 -10
  68. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  69. mindspore/dataset/debug/debug_hook.py +65 -0
  70. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  71. mindspore/dataset/engine/__init__.py +7 -3
  72. mindspore/dataset/engine/cache_client.py +9 -9
  73. mindspore/dataset/engine/datasets.py +648 -477
  74. mindspore/dataset/engine/datasets_audio.py +165 -167
  75. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  76. mindspore/dataset/engine/datasets_text.py +492 -342
  77. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  78. mindspore/dataset/engine/datasets_vision.py +1224 -699
  79. mindspore/dataset/engine/graphdata.py +134 -69
  80. mindspore/dataset/engine/iterators.py +50 -9
  81. mindspore/dataset/engine/offload.py +52 -31
  82. mindspore/dataset/engine/samplers.py +27 -24
  83. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  84. mindspore/dataset/engine/validators.py +213 -52
  85. mindspore/dataset/text/__init__.py +10 -8
  86. mindspore/dataset/text/transforms.py +152 -57
  87. mindspore/dataset/text/utils.py +98 -49
  88. mindspore/dataset/text/validators.py +25 -0
  89. mindspore/dataset/transforms/__init__.py +4 -2
  90. mindspore/dataset/transforms/c_transforms.py +11 -13
  91. mindspore/dataset/transforms/py_transforms.py +2 -2
  92. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  93. mindspore/dataset/transforms/transforms.py +13 -15
  94. mindspore/dataset/transforms/validators.py +7 -7
  95. mindspore/dataset/utils/__init__.py +2 -1
  96. mindspore/dataset/utils/browse_dataset.py +13 -13
  97. mindspore/dataset/utils/line_reader.py +121 -0
  98. mindspore/dataset/vision/__init__.py +8 -7
  99. mindspore/dataset/vision/c_transforms.py +125 -126
  100. mindspore/dataset/vision/py_transforms.py +37 -37
  101. mindspore/dataset/vision/py_transforms_util.py +23 -20
  102. mindspore/dataset/vision/transforms.py +316 -315
  103. mindspore/dataset/vision/utils.py +313 -17
  104. mindspore/dataset/vision/validators.py +6 -6
  105. mindspore/default_config.py +0 -1
  106. mindspore/{compression → experimental}/__init__.py +6 -5
  107. mindspore/experimental/map_parameter.py +275 -0
  108. mindspore/include/OWNERS +0 -1
  109. mindspore/include/api/callback/callback.h +9 -13
  110. mindspore/include/api/callback/ckpt_saver.h +2 -2
  111. mindspore/include/api/callback/loss_monitor.h +2 -2
  112. mindspore/include/api/callback/lr_scheduler.h +5 -5
  113. mindspore/include/api/callback/time_monitor.h +2 -2
  114. mindspore/include/api/callback/train_accuracy.h +4 -6
  115. mindspore/include/api/cfg.h +19 -6
  116. mindspore/include/api/context.h +70 -9
  117. mindspore/include/api/delegate.h +8 -1
  118. mindspore/include/api/dual_abi_helper.h +8 -24
  119. mindspore/include/api/metrics/accuracy.h +2 -2
  120. mindspore/include/api/metrics/metrics.h +4 -3
  121. mindspore/include/api/model.h +9 -4
  122. mindspore/include/api/model_group.h +68 -0
  123. mindspore/include/api/model_parallel_runner.h +17 -17
  124. mindspore/include/api/net.h +12 -11
  125. mindspore/include/api/serialization.h +20 -4
  126. mindspore/include/api/status.h +7 -1
  127. mindspore/include/api/types.h +25 -21
  128. mindspore/include/api/visible.h +4 -0
  129. mindspore/include/c_api/model_c.h +5 -0
  130. mindspore/include/c_api/status_c.h +1 -1
  131. mindspore/include/dataset/config.h +1 -1
  132. mindspore/include/dataset/constants.h +14 -0
  133. mindspore/include/dataset/text.h +59 -0
  134. mindspore/include/dataset/vision.h +56 -117
  135. mindspore/include/dataset/vision_lite.h +102 -0
  136. mindspore/include/mindapi/base/type_id.h +42 -3
  137. mindspore/lib/libdnnl.so.2 +0 -0
  138. mindspore/lib/libicudata.so.69 +0 -0
  139. mindspore/lib/libicui18n.so.69 +0 -0
  140. mindspore/lib/libicuuc.so.69 +0 -0
  141. mindspore/lib/libmindspore.so +0 -0
  142. mindspore/lib/libmindspore_backend.so +0 -0
  143. mindspore/lib/libmindspore_common.so +0 -0
  144. mindspore/lib/libmindspore_core.so +0 -0
  145. mindspore/lib/libmindspore_glog.so.0 +0 -0
  146. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  147. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  148. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  149. mindspore/lib/libmindspore_shared_lib.so +0 -0
  150. mindspore/lib/libmpi_adapter.so +0 -0
  151. mindspore/lib/libmpi_collective.so +0 -0
  152. mindspore/lib/libnnacl.so +0 -0
  153. mindspore/lib/libopencv_core.so.4.5 +0 -0
  154. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  155. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  156. mindspore/lib/libps_cache.so +0 -0
  157. mindspore/lib/plugin/ascend/libakg.so +0 -0
  158. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  159. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  160. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  161. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  162. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  163. mindspore/lib/{libakg.so → plugin/cpu/libakg.so} +0 -0
  164. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  165. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  166. mindspore/log.py +28 -28
  167. mindspore/mindrecord/common/exceptions.py +2 -4
  168. mindspore/mindrecord/filereader.py +19 -1
  169. mindspore/mindrecord/filewriter.py +250 -88
  170. mindspore/mindrecord/mindpage.py +13 -13
  171. mindspore/mindrecord/shardheader.py +15 -15
  172. mindspore/mindrecord/shardreader.py +9 -0
  173. mindspore/mindrecord/shardwriter.py +29 -29
  174. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  175. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  176. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  177. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  178. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  179. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  180. mindspore/nn/__init__.py +1 -5
  181. mindspore/nn/cell.py +297 -234
  182. mindspore/nn/dynamic_lr.py +1 -1
  183. mindspore/nn/grad/cell_grad.py +17 -42
  184. mindspore/nn/layer/__init__.py +7 -4
  185. mindspore/nn/layer/activation.py +131 -88
  186. mindspore/nn/layer/basic.py +313 -613
  187. mindspore/nn/layer/channel_shuffle.py +103 -0
  188. mindspore/nn/layer/combined.py +1 -1
  189. mindspore/nn/layer/container.py +52 -6
  190. mindspore/nn/layer/conv.py +112 -43
  191. mindspore/nn/layer/dense.py +10 -9
  192. mindspore/nn/layer/embedding.py +36 -34
  193. mindspore/nn/layer/image.py +123 -27
  194. mindspore/nn/layer/math.py +108 -107
  195. mindspore/nn/layer/normalization.py +212 -366
  196. mindspore/nn/layer/padding.py +370 -42
  197. mindspore/nn/layer/pooling.py +1443 -219
  198. mindspore/nn/layer/rnn_cells.py +11 -16
  199. mindspore/nn/layer/rnns.py +38 -39
  200. mindspore/nn/layer/thor_layer.py +24 -25
  201. mindspore/nn/layer/timedistributed.py +5 -5
  202. mindspore/nn/layer/transformer.py +701 -0
  203. mindspore/nn/learning_rate_schedule.py +8 -8
  204. mindspore/nn/loss/__init__.py +9 -6
  205. mindspore/nn/loss/loss.py +678 -142
  206. mindspore/nn/metrics.py +53 -0
  207. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  208. mindspore/nn/optim/ada_grad.py +8 -8
  209. mindspore/nn/optim/adadelta.py +2 -3
  210. mindspore/nn/optim/adafactor.py +18 -14
  211. mindspore/nn/optim/adam.py +429 -87
  212. mindspore/nn/optim/adamax.py +5 -6
  213. mindspore/nn/optim/adasum.py +10 -8
  214. mindspore/nn/optim/asgd.py +7 -7
  215. mindspore/nn/optim/ftrl.py +81 -11
  216. mindspore/nn/optim/lamb.py +7 -8
  217. mindspore/nn/optim/lars.py +4 -4
  218. mindspore/nn/optim/lazyadam.py +82 -7
  219. mindspore/nn/optim/momentum.py +8 -7
  220. mindspore/nn/optim/optimizer.py +19 -10
  221. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  222. mindspore/nn/optim/rmsprop.py +3 -3
  223. mindspore/nn/optim/rprop.py +20 -16
  224. mindspore/nn/optim/sgd.py +21 -15
  225. mindspore/nn/optim/thor.py +23 -21
  226. mindspore/nn/probability/__init__.py +0 -2
  227. mindspore/nn/probability/bijector/bijector.py +7 -6
  228. mindspore/nn/probability/bijector/invert.py +4 -2
  229. mindspore/nn/probability/bijector/softplus.py +2 -2
  230. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  231. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  232. mindspore/nn/probability/distribution/__init__.py +6 -0
  233. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  234. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  235. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  236. mindspore/nn/probability/distribution/beta.py +1 -1
  237. mindspore/nn/probability/distribution/categorical.py +9 -9
  238. mindspore/nn/probability/distribution/cauchy.py +8 -8
  239. mindspore/nn/probability/distribution/distribution.py +12 -6
  240. mindspore/nn/probability/distribution/exponential.py +5 -5
  241. mindspore/nn/probability/distribution/gamma.py +3 -3
  242. mindspore/nn/probability/distribution/geometric.py +6 -5
  243. mindspore/nn/probability/distribution/gumbel.py +5 -5
  244. mindspore/nn/probability/distribution/half_normal.py +133 -0
  245. mindspore/nn/probability/distribution/laplace.py +128 -0
  246. mindspore/nn/probability/distribution/log_normal.py +0 -1
  247. mindspore/nn/probability/distribution/logistic.py +4 -5
  248. mindspore/nn/probability/distribution/normal.py +11 -15
  249. mindspore/nn/probability/distribution/poisson.py +6 -2
  250. mindspore/nn/probability/distribution/student_t.py +150 -0
  251. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  252. mindspore/nn/probability/distribution/uniform.py +5 -5
  253. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  254. mindspore/nn/reinforcement/tensor_array.py +2 -2
  255. mindspore/nn/sparse/sparse.py +8 -1
  256. mindspore/nn/wrap/cell_wrapper.py +55 -27
  257. mindspore/nn/wrap/grad_reducer.py +20 -11
  258. mindspore/nn/wrap/loss_scale.py +47 -30
  259. mindspore/numpy/array_creations.py +33 -22
  260. mindspore/numpy/array_ops.py +46 -42
  261. mindspore/numpy/logic_ops.py +6 -27
  262. mindspore/numpy/math_ops.py +26 -19
  263. mindspore/numpy/utils.py +1 -8
  264. mindspore/numpy/utils_const.py +112 -62
  265. mindspore/ops/__init__.py +6 -3
  266. mindspore/ops/_constants.py +0 -6
  267. mindspore/ops/_grad/__init__.py +2 -1
  268. mindspore/ops/_grad/grad_array_ops.py +209 -152
  269. mindspore/ops/_grad/grad_base.py +55 -17
  270. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  271. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  272. mindspore/ops/_grad/grad_implementations.py +21 -61
  273. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  274. mindspore/ops/_grad/grad_math_ops.py +306 -161
  275. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  276. mindspore/ops/_grad/grad_other_ops.py +1 -1
  277. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  278. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  279. mindspore/ops/_grad/grad_sparse.py +15 -9
  280. mindspore/ops/_grad_experimental/__init__.py +1 -0
  281. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  282. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  283. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  284. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  285. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  286. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  287. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  288. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  289. mindspore/ops/_op_impl/__init__.py +3 -3
  290. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  291. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  292. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  293. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  294. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  295. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  296. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  297. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  298. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  299. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  300. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  302. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  303. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  304. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  305. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  307. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  314. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  315. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  317. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  318. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  320. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  321. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  322. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  324. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  326. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  327. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  328. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  329. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  330. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  331. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  332. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  335. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  336. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  337. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  338. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  339. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  340. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  341. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  342. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  343. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  344. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  345. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  346. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  347. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  349. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  350. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  351. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  352. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  353. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  354. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  355. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  356. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  357. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  358. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  359. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  360. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  361. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  362. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  363. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  365. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  366. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  367. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  368. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  369. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  370. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  371. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  372. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  373. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  374. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  375. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  376. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  377. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  378. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  379. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  380. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  381. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  382. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  383. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  384. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  385. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  386. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  387. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  388. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  389. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  390. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  391. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  392. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  393. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  394. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  395. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  396. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  397. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  398. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  399. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  400. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  401. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  402. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  403. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  404. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  405. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  407. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  408. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  409. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  410. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  411. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  412. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  413. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  414. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  415. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  416. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  417. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  418. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  419. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  420. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  421. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  422. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  423. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  424. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  425. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  426. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  427. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  428. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  429. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  430. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  431. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  432. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  433. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  434. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  435. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  436. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  437. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  441. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  442. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  443. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  444. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  445. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  446. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  447. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  448. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  449. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  450. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  451. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  452. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  453. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  454. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  455. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  456. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  457. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  458. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  459. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  460. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  461. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  462. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  463. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  464. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  465. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  466. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  467. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  468. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  469. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  470. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  471. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  472. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  473. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  474. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  475. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  476. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  477. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  478. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  479. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  480. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  481. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  482. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  483. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  484. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  485. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  486. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  487. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  488. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  489. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  490. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  491. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  492. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  493. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  494. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  495. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  496. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  497. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  498. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  499. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  500. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  501. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  502. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  503. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  504. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  505. mindspore/ops/_primitive_cache.py +3 -2
  506. mindspore/ops/_register_for_op.py +11 -0
  507. mindspore/ops/_utils/__init__.py +1 -1
  508. mindspore/ops/_utils/utils.py +20 -41
  509. mindspore/ops/_vmap/__init__.py +2 -2
  510. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  511. mindspore/ops/_vmap/vmap_base.py +24 -10
  512. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  513. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  514. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  515. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  516. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  517. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  518. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  519. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  520. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  521. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  522. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  523. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  524. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  525. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  526. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  527. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  528. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  529. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  530. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  531. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  532. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  533. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  534. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  535. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  536. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  537. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  538. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  539. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  541. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  542. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  543. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  544. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  545. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  546. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  547. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  548. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  549. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  550. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  551. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  552. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  553. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  554. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  555. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  556. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  559. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  560. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  561. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  563. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  564. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  565. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  566. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  567. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  568. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  569. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  570. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  571. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  572. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  573. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  574. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  575. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  576. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  577. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  578. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  579. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  580. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  581. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  582. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  583. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  584. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  585. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  586. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  587. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  588. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  589. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  590. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  591. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  592. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  593. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  594. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  595. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  596. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  597. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  598. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  599. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  600. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  601. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  604. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  605. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  606. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  607. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  609. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  610. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  611. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  612. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  613. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  614. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  615. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  616. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  617. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  618. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  619. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  620. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  621. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  622. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  623. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  624. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  625. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  626. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  627. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  628. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  629. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  630. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  632. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  633. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  634. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  635. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  637. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  638. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  639. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  640. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  643. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  644. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  645. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  646. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  647. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  648. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  649. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  650. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  651. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  652. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  653. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  654. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  655. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  656. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  657. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  658. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  660. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  661. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  662. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  663. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  664. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  665. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  666. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  667. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  668. mindspore/ops/bprop_mindir/__init__.py +1 -4
  669. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  670. mindspore/ops/composite/__init__.py +12 -13
  671. mindspore/ops/composite/base.py +261 -254
  672. mindspore/ops/composite/env_ops.py +41 -0
  673. mindspore/ops/composite/math_ops.py +197 -156
  674. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  675. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  676. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  677. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  678. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  679. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  680. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  681. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  682. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  683. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  684. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  685. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  686. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  687. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  688. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  689. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  690. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  691. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  692. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  693. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  694. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  695. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  696. mindspore/ops/function/__init__.py +323 -8
  697. mindspore/ops/function/array_func.py +3511 -780
  698. mindspore/ops/function/clip_func.py +329 -0
  699. mindspore/ops/function/debug_func.py +6 -6
  700. mindspore/ops/function/grad/__init__.py +5 -1
  701. mindspore/ops/function/grad/grad_func.py +736 -65
  702. mindspore/ops/function/image_func.py +270 -0
  703. mindspore/ops/function/linalg_func.py +268 -8
  704. mindspore/ops/function/math_func.py +8032 -3164
  705. mindspore/ops/function/nn_func.py +5619 -1855
  706. mindspore/ops/function/other_func.py +115 -0
  707. mindspore/ops/function/parameter_func.py +11 -10
  708. mindspore/ops/function/random_func.py +939 -77
  709. mindspore/ops/function/sparse_func.py +249 -84
  710. mindspore/ops/function/sparse_unary_func.py +2303 -0
  711. mindspore/ops/function/spectral_func.py +146 -0
  712. mindspore/ops/function/vmap_func.py +114 -0
  713. mindspore/ops/functional.py +182 -254
  714. mindspore/ops/op_info_register.py +79 -34
  715. mindspore/ops/operations/__init__.py +210 -118
  716. mindspore/ops/operations/_csr_ops.py +7 -7
  717. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  718. mindspore/ops/operations/_grad_ops.py +447 -322
  719. mindspore/ops/operations/_inner_ops.py +547 -176
  720. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  721. mindspore/ops/operations/_ms_kernel.py +29 -27
  722. mindspore/ops/operations/_ocr_ops.py +11 -11
  723. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  724. mindspore/ops/operations/_quant_ops.py +186 -101
  725. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  726. mindspore/ops/operations/_scalar_ops.py +466 -0
  727. mindspore/ops/operations/_sequence_ops.py +1047 -0
  728. mindspore/ops/operations/_tensor_array.py +10 -11
  729. mindspore/ops/operations/_thor_ops.py +4 -4
  730. mindspore/ops/operations/array_ops.py +1428 -1226
  731. mindspore/ops/operations/comm_ops.py +180 -117
  732. mindspore/ops/operations/control_ops.py +4 -2
  733. mindspore/ops/operations/custom_ops.py +185 -98
  734. mindspore/ops/operations/debug_ops.py +92 -54
  735. mindspore/ops/operations/image_ops.py +406 -211
  736. mindspore/ops/operations/inner_ops.py +42 -53
  737. mindspore/ops/operations/linalg_ops.py +32 -29
  738. mindspore/ops/operations/math_ops.py +2076 -897
  739. mindspore/ops/operations/nn_ops.py +1282 -1252
  740. mindspore/ops/operations/other_ops.py +124 -278
  741. mindspore/ops/operations/random_ops.py +345 -178
  742. mindspore/ops/operations/rl_ops.py +8 -9
  743. mindspore/ops/operations/sparse_ops.py +502 -157
  744. mindspore/ops/operations/spectral_ops.py +107 -0
  745. mindspore/ops/primitive.py +192 -15
  746. mindspore/ops/vm_impl_registry.py +23 -2
  747. mindspore/parallel/__init__.py +6 -1
  748. mindspore/parallel/_auto_parallel_context.py +199 -92
  749. mindspore/parallel/_cell_wrapper.py +4 -2
  750. mindspore/parallel/_cost_model_context.py +3 -0
  751. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  752. mindspore/parallel/_offload_context.py +185 -0
  753. mindspore/parallel/_parallel_serialization.py +167 -28
  754. mindspore/parallel/_ps_context.py +9 -5
  755. mindspore/parallel/_recovery_context.py +1 -1
  756. mindspore/parallel/_tensor.py +9 -1
  757. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  758. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  759. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  760. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  761. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  762. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  763. mindspore/parallel/_utils.py +47 -7
  764. mindspore/parallel/algo_parameter_config.py +5 -1
  765. mindspore/parallel/checkpoint_transform.py +329 -0
  766. mindspore/parallel/shard.py +229 -0
  767. mindspore/profiler/__init__.py +2 -1
  768. mindspore/profiler/common/util.py +4 -3
  769. mindspore/profiler/common/validator/validate_path.py +2 -2
  770. mindspore/profiler/envprofiling.py +249 -0
  771. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  772. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  773. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  774. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  775. mindspore/profiler/parser/framework_parser.py +42 -16
  776. mindspore/profiler/parser/hccl_parser.py +158 -158
  777. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  778. mindspore/profiler/parser/integrator.py +18 -1579
  779. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  780. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  781. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  782. mindspore/profiler/parser/optime_parser.py +17 -18
  783. mindspore/profiler/parser/profiler_info.py +108 -0
  784. mindspore/profiler/parser/step_trace_parser.py +1 -1
  785. mindspore/profiler/profiling.py +396 -194
  786. mindspore/rewrite/__init__.py +6 -2
  787. mindspore/rewrite/api/node.py +51 -110
  788. mindspore/rewrite/api/node_type.py +10 -6
  789. mindspore/rewrite/api/pattern_engine.py +51 -7
  790. mindspore/rewrite/api/scoped_value.py +64 -53
  791. mindspore/rewrite/api/symbol_tree.py +108 -61
  792. mindspore/rewrite/api/tree_node_helper.py +2 -3
  793. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  794. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  795. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  796. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  797. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  798. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  799. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  800. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  801. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  802. mindspore/rewrite/common/__init__.py +2 -0
  803. mindspore/rewrite/common/event.py +1 -1
  804. mindspore/rewrite/common/observable.py +1 -1
  805. mindspore/rewrite/common/observer.py +1 -1
  806. mindspore/rewrite/common/rewrite_elog.py +35 -0
  807. mindspore/rewrite/namer.py +2 -2
  808. mindspore/rewrite/namespace.py +14 -4
  809. mindspore/rewrite/node.py +161 -13
  810. mindspore/rewrite/parser.py +0 -1
  811. mindspore/rewrite/parser_register.py +0 -1
  812. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  813. mindspore/rewrite/parsers/assign_parser.py +267 -67
  814. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  815. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  816. mindspore/rewrite/parsers/constant_parser.py +101 -0
  817. mindspore/rewrite/parsers/container_parser.py +88 -0
  818. mindspore/rewrite/parsers/for_parser.py +28 -15
  819. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  820. mindspore/rewrite/parsers/if_parser.py +11 -28
  821. mindspore/rewrite/parsers/module_parser.py +9 -6
  822. mindspore/rewrite/parsers/return_parser.py +3 -2
  823. mindspore/rewrite/sparsify/__init__.py +0 -0
  824. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  825. mindspore/rewrite/sparsify/sparsify.py +109 -0
  826. mindspore/rewrite/sparsify/utils.py +173 -0
  827. mindspore/rewrite/symbol_tree.py +322 -109
  828. mindspore/rewrite/symbol_tree_builder.py +45 -8
  829. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  830. mindspore/rewrite/topological_manager.py +1 -2
  831. mindspore/run_check/_check_version.py +209 -112
  832. mindspore/run_check/run_check.py +2 -1
  833. mindspore/scipy/linalg.py +13 -117
  834. mindspore/scipy/ops.py +5 -71
  835. mindspore/scipy/ops_grad.py +1 -25
  836. mindspore/scipy/ops_wrapper.py +1 -1
  837. mindspore/scipy/optimize/_bfgs.py +1 -1
  838. mindspore/scipy/optimize/_lagrange.py +200 -0
  839. mindspore/scipy/optimize/line_search.py +3 -2
  840. mindspore/scipy/optimize/minimize.py +43 -6
  841. mindspore/scipy/sparse/__init__.py +2 -2
  842. mindspore/scipy/sparse/linalg.py +5 -465
  843. mindspore/scipy/utils.py +2 -1
  844. mindspore/scipy/utils_const.py +7 -1
  845. mindspore/train/__init__.py +6 -4
  846. mindspore/train/_utils.py +28 -5
  847. mindspore/train/amp.py +321 -50
  848. mindspore/train/callback/__init__.py +3 -1
  849. mindspore/train/callback/_backup_and_restore.py +120 -0
  850. mindspore/train/callback/_callback.py +8 -8
  851. mindspore/train/callback/_checkpoint.py +12 -9
  852. mindspore/train/callback/_early_stop.py +13 -7
  853. mindspore/train/callback/_history.py +8 -8
  854. mindspore/train/callback/_lambda_callback.py +6 -6
  855. mindspore/train/callback/_landscape.py +36 -38
  856. mindspore/train/callback/_loss_monitor.py +12 -6
  857. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  858. mindspore/train/callback/_on_request_exit.py +212 -0
  859. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  860. mindspore/train/callback/_summary_collector.py +27 -19
  861. mindspore/train/callback/_time_monitor.py +13 -7
  862. mindspore/train/checkpoint_pb2.py +68 -8
  863. mindspore/train/data_sink.py +122 -33
  864. mindspore/train/dataset_helper.py +28 -87
  865. mindspore/train/loss_scale_manager.py +4 -7
  866. mindspore/{nn → train}/metrics/__init__.py +20 -20
  867. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  868. mindspore/{nn → train}/metrics/auc.py +4 -4
  869. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  870. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  871. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  872. mindspore/{nn → train}/metrics/dice.py +6 -5
  873. mindspore/{nn → train}/metrics/error.py +7 -5
  874. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  875. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  876. mindspore/{nn → train}/metrics/loss.py +4 -3
  877. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  878. mindspore/{nn → train}/metrics/metric.py +6 -5
  879. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  880. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  881. mindspore/{nn → train}/metrics/precision.py +5 -4
  882. mindspore/{nn → train}/metrics/recall.py +5 -4
  883. mindspore/{nn → train}/metrics/roc.py +7 -6
  884. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  885. mindspore/{nn → train}/metrics/topk.py +7 -5
  886. mindspore/train/mind_ir_pb2.py +339 -32
  887. mindspore/train/model.py +113 -84
  888. mindspore/train/serialization.py +547 -167
  889. mindspore/train/summary/_summary_adapter.py +1 -1
  890. mindspore/train/summary/summary_record.py +43 -12
  891. mindspore/train/train_thor/convert_utils.py +7 -1
  892. mindspore/train/train_thor/dataset_helper.py +3 -3
  893. mindspore/train/train_thor/model_thor.py +0 -4
  894. mindspore/version.py +1 -1
  895. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  896. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +899 -675
  897. mindspore/compression/common/constant.py +0 -124
  898. mindspore/compression/export/__init__.py +0 -19
  899. mindspore/compression/export/quant_export.py +0 -514
  900. mindspore/compression/quant/qat.py +0 -636
  901. mindspore/compression/quant/quant_utils.py +0 -462
  902. mindspore/compression/quant/quantizer.py +0 -68
  903. mindspore/nn/layer/quant.py +0 -1868
  904. mindspore/nn/layer/rnn_utils.py +0 -90
  905. mindspore/nn/probability/dpn/__init__.py +0 -22
  906. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  907. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  908. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  909. mindspore/nn/probability/infer/__init__.py +0 -22
  910. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  911. mindspore/nn/probability/infer/variational/svi.py +0 -84
  912. mindspore/nn/probability/toolbox/__init__.py +0 -22
  913. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  914. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  915. mindspore/nn/probability/transforms/__init__.py +0 -22
  916. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  917. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  918. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  919. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  920. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  921. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  922. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  923. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  924. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  925. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  926. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  927. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  928. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  929. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  930. mindspore/ops/composite/array_ops.py +0 -210
  931. mindspore/ops/composite/clip_ops.py +0 -238
  932. mindspore/ops/composite/random_ops.py +0 -426
  933. mindspore/ops/composite/vmap_ops.py +0 -38
  934. mindspore/ops/operations/sponge_ops.py +0 -3531
  935. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  936. mindspore/parallel/nn/__init__.py +0 -42
  937. mindspore/parallel/nn/loss.py +0 -22
  938. mindspore/parallel/nn/moe.py +0 -21
  939. mindspore/parallel/nn/op_parallel_config.py +0 -22
  940. mindspore/parallel/nn/transformer.py +0 -31
  941. mindspore/run_check/_check_deps_version.py +0 -84
  942. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  943. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  944. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -30,43 +30,49 @@ from .validators import check_imdb_dataset, check_iwslt2016_dataset, check_iwslt
30
30
  check_penn_treebank_dataset, check_ag_news_dataset, check_amazon_review_dataset, check_udpos_dataset, \
31
31
  check_wiki_text_dataset, check_conll2000_dataset, check_cluedataset, \
32
32
  check_sogou_news_dataset, check_textfiledataset, check_dbpedia_dataset, check_yelp_review_dataset, \
33
- check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset
33
+ check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset, \
34
+ check_sst2_dataset
34
35
 
35
36
  from ..core.validator_helpers import replace_none
36
37
 
37
38
 
38
39
  class AGNewsDataset(SourceDataset, TextBaseDataset):
39
40
  """
40
- A source dataset that reads and parses AG News datasets.
41
+ AG News dataset.
41
42
 
42
- The generated dataset has three columns: :py:obj:`[index, title, description]`.
43
- The tensor of column :py:obj:`index` is of the string type.
44
- The tensor of column :py:obj:`title` is of the string type.
45
- The tensor of column :py:obj:`description` is of the string type.
43
+ The generated dataset has three columns: :py:obj:`[index, title, description]` ,
44
+ and the data type of three columns is string type.
46
45
 
47
46
  Args:
48
47
  dataset_dir (str): Path to the root directory that contains the dataset.
49
- usage (str, optional): Acceptable usages include 'train', 'test' and 'all' (default=None, all samples).
50
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
51
- num_parallel_workers (int, optional): Number of workers to read the data
52
- (default=None, number set in the config).
53
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
54
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
55
- If shuffle is False, no shuffling will be performed.
56
- If shuffle is True, performs global shuffle.
57
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
48
+ usage (str, optional): Acceptable usages include 'train', 'test' and 'all'. Default: None, all samples.
49
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
50
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
51
+ Default: None, will use global default workers(8), it can be set
52
+ by `mindspore.dataset.config.set_num_parallel_workers` .
53
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
54
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
55
+ If `shuffle` is False, no shuffling will be performed.
56
+ If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
57
+ Set the mode of data shuffling by passing in enumeration variables:
58
58
 
59
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
59
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
60
60
 
61
61
  - Shuffle.FILES: Shuffle files only.
62
62
 
63
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
64
- When this argument is specified, 'num_samples' reflects the max sample number of per shard.
65
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
66
- argument can only be specified when `num_shards` is also specified.
63
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
64
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
65
+ shard_id (int, optional): The shard ID within `num_shards` . This
66
+ argument can only be specified when `num_shards` is also specified. Default: None.
67
67
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
68
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
69
- (default=None, which means no cache is used).
68
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
69
+ Default: None, which means no cache is used.
70
+
71
+ Raises:
72
+ RuntimeError: If `dataset_dir` does not contain data files.
73
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
74
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
75
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
70
76
 
71
77
  Examples:
72
78
  >>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
@@ -125,49 +131,48 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
125
131
 
126
132
  class AmazonReviewDataset(SourceDataset, TextBaseDataset):
127
133
  """
128
- A source dataset that reads and parses Amazon Review Polarity and Amazon Review Full datasets.
134
+ Amazon Review Polarity and Amazon Review Full datasets.
129
135
 
130
- The generated dataset has three columns: :py:obj:`[label, title, content]`.
131
- The tensor of column :py:obj:`label` is of the string type.
132
- The tensor of column :py:obj:`title` is of the string type.
133
- The tensor of column :py:obj:`content` is of the string type.
136
+ The generated dataset has three columns: :py:obj:`[label, title, content]` ,
137
+ and the data type of three columns is string.
134
138
 
135
139
  Args:
136
140
  dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
137
141
  or the Amazon Review Full dataset.
138
- usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' (default= 'all').
142
+ usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
139
143
  For Polarity dataset, 'train' will read from 3,600,000 train samples,
140
144
  'test' will read from 400,000 test samples,
141
145
  'all' will read from all 4,000,000 samples.
142
146
  For Full dataset, 'train' will read from 3,000,000 train samples,
143
147
  'test' will read from 650,000 test samples,
144
- 'all' will read from all 3,650,000 samples (default=None, all samples).
145
- num_samples (int, optional): Number of samples (rows) to be read (default=None, reads the full dataset).
146
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
147
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
148
- If shuffle is False, no shuffling will be performed.
149
- If shuffle is True, performs global shuffle.
150
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
148
+ 'all' will read from all 3,650,000 samples. Default: None, all samples.
149
+ num_samples (int, optional): Number of samples (rows) to be read. Default: None, reads the full dataset.
150
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
151
+ Default: None, will use global default workers(8), it can be set
152
+ by `mindspore.dataset.config.set_num_parallel_workers` .
153
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
154
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
155
+ If `shuffle` is False, no shuffling will be performed.
156
+ If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
157
+ Set the mode of data shuffling by passing in enumeration variables:
151
158
 
152
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
159
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
153
160
 
154
161
  - Shuffle.FILES: Shuffle files only.
155
162
 
156
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
163
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
157
164
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
158
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
165
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
159
166
  argument can only be specified when `num_shards` is also specified.
160
- num_parallel_workers (int, optional): Number of workers to read the data
161
- (default=None, number set in the mindspore.dataset.config).
162
167
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
163
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
164
- (default=None, which means no cache is used).
168
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
169
+ Default: None, which means no cache is used.
165
170
 
166
171
  Raises:
167
172
  RuntimeError: If `dataset_dir` does not contain data files.
168
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
169
173
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
170
174
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
175
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
171
176
 
172
177
  Examples:
173
178
  >>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
@@ -180,7 +185,7 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
180
185
  The dataset is mainly used for text classification, given the content and title, predict the correct star rating.
181
186
 
182
187
  The Amazon reviews polarity dataset is constructed by taking review score 1 and 2 as negative, 4 and 5 as positive.
183
- Samples of score 3 is ignored. In the dataset, class 1 is the negative and class 2 is the positive.
188
+ Samples of score 3 is ignored.
184
189
 
185
190
  The Amazon Reviews Polarity and Amazon Reviews Full datasets have the same directory structures.
186
191
  You can unzip the dataset files into the following structure and read by MindSpore's API:
@@ -222,21 +227,22 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
222
227
 
223
228
  class CLUEDataset(SourceDataset, TextBaseDataset):
224
229
  """
225
- A source dataset that reads and parses CLUE datasets.
230
+ CLUE(Chinese Language Understanding Evaluation) dataset.
226
231
  Supported CLUE classification tasks: 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
227
232
 
228
233
  Args:
229
234
  dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
230
235
  a pattern of files. The list will be sorted in a lexicographical order.
231
236
  task (str, optional): The kind of task, one of 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' and 'CSL'.
232
- (default=AFQMC).
233
- usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset (default='train').
234
- num_samples (int, optional): The number of samples to be included in the dataset
235
- (default=None, will include all images).
236
- num_parallel_workers (int, optional): Number of workers to read the data
237
- (default=None, number set in the config).
238
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
239
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
237
+ Default: 'AFQMC'.
238
+ usage (str, optional): Specify the 'train', 'test' or 'eval' part of dataset. Default: 'train'.
239
+ num_samples (int, optional): The number of samples to be included in the dataset.
240
+ Default: None, will include all images.
241
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
242
+ Default: None, will use global default workers(8), it can be set
243
+ by `mindspore.dataset.config.set_num_parallel_workers` .
244
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
245
+ Default: Shuffle.GLOBAL. Bool type and Shuffle enum are both supported to pass in.
240
246
  If shuffle is False, no shuffling will be performed.
241
247
  If shuffle is True, performs global shuffle.
242
248
  There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
@@ -245,13 +251,13 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
245
251
 
246
252
  - Shuffle.FILES: Shuffle files only.
247
253
 
248
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
254
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
249
255
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
250
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
256
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
251
257
  argument can only be specified when `num_shards` is also specified.
252
258
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
253
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
254
- (default=None, which means no cache is used).
259
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
260
+ Default: None, which means no cache is used.
255
261
 
256
262
  The generated dataset with different task setting has different output columns:
257
263
 
@@ -400,9 +406,9 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
400
406
  ValueError: task is not in 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' or 'CSL'.
401
407
  ValueError: usage is not in 'train', 'test' or 'eval'.
402
408
  ValueError: If `num_parallel_workers` exceeds the max thread numbers.
409
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
403
410
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
404
411
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
405
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
406
412
 
407
413
  Examples:
408
414
  >>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
@@ -457,10 +463,10 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
457
463
 
458
464
  class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
459
465
  """
460
- A source dataset that reads and parses CoNLL2000 chunking dataset.
466
+ CoNLL-2000(Conference on Computational Natural Language Learning) chunking dataset.
461
467
 
462
- The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]`.
463
- The tensors of column :py:obj:`word`, column :py:obj:`pos_tag`,
468
+ The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]` .
469
+ The tensors of column :py:obj:`word` , column :py:obj:`pos_tag` ,
464
470
  and column :py:obj:`chunk_tag` are of the string type.
465
471
 
466
472
  Args:
@@ -471,7 +477,7 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
471
477
  'all' will read from all 1,0948 samples. Default: None, read all samples.
472
478
  num_samples (int, optional): Number of samples (rows) to be read. Default: None, read the full dataset.
473
479
  shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
474
- Default: mindspore.dataset.Shuffle.GLOBAL.
480
+ Default: `mindspore.dataset.Shuffle.GLOBAL` .
475
481
  If shuffle is False, no shuffling will be performed.
476
482
  If shuffle is True, performs global shuffle.
477
483
  There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
@@ -481,12 +487,13 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
481
487
 
482
488
  num_shards (int, optional): Number of shards that the dataset will be divided into.
483
489
  When this argument is specified, `num_samples` reflects the max sample number of per shard. Default: None.
484
- shard_id (int, optional): The shard ID within `num_shards`. This
490
+ shard_id (int, optional): The shard ID within `num_shards` . This
485
491
  argument can only be specified when `num_shards` is also specified. Default: None.
486
- num_parallel_workers (int, optional): Number of workers to read the data.
487
- Default: None, number set in the config.
492
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
493
+ Default: None, will use global default workers(8), it can be set
494
+ by `mindspore.dataset.config.set_num_parallel_workers` .
488
495
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
489
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_.
496
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
490
497
  Default: None, which means no cache is used.
491
498
 
492
499
  Raises:
@@ -547,47 +554,46 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
547
554
 
548
555
  class DBpediaDataset(SourceDataset, TextBaseDataset):
549
556
  """
550
- A source dataset that reads and parses the DBpedia dataset.
557
+ DBpedia dataset.
551
558
 
552
- The generated dataset has three columns :py:obj:`[class, title, content]`.
553
- The tensor of column :py:obj:`class` is of the string type.
554
- The tensor of column :py:obj:`title` is of the string type.
555
- The tensor of column :py:obj:`content` is of the string type.
559
+ The generated dataset has three columns :py:obj:`[class, title, content]` ,
560
+ and the data type of three columns is string.
556
561
 
557
562
  Args:
558
563
  dataset_dir (str): Path to the root directory that contains the dataset.
559
564
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
560
565
  'train' will read from 560,000 train samples,
561
566
  'test' will read from 70,000 test samples,
562
- 'all' will read from all 630,000 samples (default=None, all samples).
563
- num_samples (int, optional): The number of samples to be included in the dataset
564
- (default=None, will include all text).
565
- num_parallel_workers (int, optional): Number of workers to read the data
566
- (default=None, number set in the config).
567
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
568
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
567
+ 'all' will read from all 630,000 samples. Default: None, all samples.
568
+ num_samples (int, optional): The number of samples to be included in the dataset.
569
+ Default: None, will include all text.
570
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
571
+ Default: None, will use global default workers(8), it can be set
572
+ by `mindspore.dataset.config.set_num_parallel_workers` .
573
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
574
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
569
575
  If shuffle is False, no shuffling will be performed.
570
- If shuffle is True, performs global shuffle.
571
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
576
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
577
+ Set the mode of data shuffling by passing in enumeration variables:
572
578
 
573
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
579
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
574
580
 
575
581
  - Shuffle.FILES: Shuffle files only.
576
582
 
577
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
583
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
578
584
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
579
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
585
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
580
586
  argument can only be specified when `num_shards` is also specified.
581
587
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
582
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
583
- (default=None, which means no cache is used).
588
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
589
+ Default: None, which means no cache is used.
584
590
 
585
591
  Raises:
586
592
  RuntimeError: If `dataset_dir` does not contain data files.
587
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
588
593
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
589
594
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
590
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
595
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
596
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
591
597
 
592
598
  Examples:
593
599
  >>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
@@ -646,33 +652,40 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
646
652
 
647
653
  class EnWik9Dataset(SourceDataset, TextBaseDataset):
648
654
  """
649
- A source dataset that reads and parses EnWik9 dataset.
655
+ EnWik9 dataset.
650
656
 
651
657
  The generated dataset has one column :py:obj:`[text]` with type string.
652
658
 
653
659
  Args:
654
660
  dataset_dir (str): Path to the root directory that contains the dataset.
655
- num_samples (int, optional): The number of samples to be included in the dataset
656
- (default=None, will include all samples).
657
- num_parallel_workers (int, optional): Number of workers to read the data
658
- (default=None, number set in the config).
659
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
660
- (default=True). Bool type and Shuffle enum are both supported to pass in.
661
+ num_samples (int, optional): The number of samples to be included in the dataset.
662
+ Default: None, will include all samples.
663
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
664
+ Default: None, will use global default workers(8), it can be set
665
+ by `mindspore.dataset.config.set_num_parallel_workers` .
666
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
667
+ Bool type and Shuffle enum are both supported to pass in. Default: True.
661
668
  If shuffle is False, no shuffling will be performed.
662
- If shuffle is True, performs global shuffle.
663
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
669
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
670
+ Set the mode of data shuffling by passing in enumeration variables:
664
671
 
665
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
672
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
666
673
 
667
674
  - Shuffle.FILES: Shuffle files only.
668
675
 
669
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
676
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
670
677
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
671
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
678
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
672
679
  argument can only be specified when `num_shards` is also specified.
673
680
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
674
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
675
- (default=None, which means no cache is used).
681
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
682
+ Default: None, which means no cache is used.
683
+
684
+ Raises:
685
+ RuntimeError: If `dataset_dir` does not contain data files.
686
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
687
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
688
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
676
689
 
677
690
  Examples:
678
691
  >>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
@@ -723,45 +736,46 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
723
736
 
724
737
  class IMDBDataset(MappableDataset, TextBaseDataset):
725
738
  """
726
- A source dataset that reads and parses Internet Movie Database (IMDb).
739
+ IMDb(Internet Movie Database) dataset.
727
740
 
728
- The generated dataset has two columns: :py:obj:`[text, label]`.
741
+ The generated dataset has two columns: :py:obj:`[text, label]` .
729
742
  The tensor of column :py:obj:`text` is of the string type.
730
- The tensor of column :py:obj:`label` is of a scalar of uint32 type.
743
+ The column :py:obj:`label` is of a scalar of uint32 type.
731
744
 
732
745
  Args:
733
746
  dataset_dir (str): Path to the root directory that contains the dataset.
734
- usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'
735
- (default=None, will read all samples).
736
- num_samples (int, optional): The number of images to be included in the dataset
737
- (default=None, will read all samples).
738
- num_parallel_workers (int, optional): Number of workers to read the data
739
- (default=None, set in the config).
740
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
741
- (default=None, expected order behavior shown in the table).
742
- sampler (Sampler, optional): Object used to choose samples from the
743
- dataset (default=None, expected order behavior shown in the table).
747
+ usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
748
+ Default: None, will read all samples.
749
+ num_samples (int, optional): The number of images to be included in the dataset.
750
+ Default: None, will include all samples.
751
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
752
+ Default: None, will use global default workers(8), it can be set
753
+ by `mindspore.dataset.config.set_num_parallel_workers` .
754
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
755
+ Default: None, expected order behavior shown in the table below.
756
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
757
+ Default: None, expected order behavior shown in the table below.
744
758
  num_shards (int, optional): Number of shards that the dataset will be divided
745
- into (default=None). When this argument is specified, `num_samples` reflects
759
+ into. Default: None. When this argument is specified, `num_samples` reflects
746
760
  the maximum sample number of per shard.
747
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
761
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
748
762
  argument can only be specified when `num_shards` is also specified.
749
763
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
750
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
751
- (default=None, which means no cache is used).
764
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
765
+ Default: None, which means no cache is used.
752
766
 
753
767
  Raises:
754
768
  RuntimeError: If `dataset_dir` does not contain data files.
755
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
756
769
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
757
770
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
758
771
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
759
772
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
760
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
773
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
774
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
761
775
 
762
776
  Note:
763
777
  - The shape of the test column.
764
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
778
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
765
779
  The table below shows what input arguments are allowed and their expected behavior.
766
780
 
767
781
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -865,49 +879,51 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
865
879
 
866
880
  class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
867
881
  """
868
- A source dataset that reads and parses IWSLT2016 datasets.
882
+ IWSLT2016(International Workshop on Spoken Language Translation) dataset.
869
883
 
870
- The generated dataset has two columns: :py:obj:`[text, translation]`.
884
+ The generated dataset has two columns: :py:obj:`[text, translation]` .
871
885
  The tensor of column :py:obj: `text` is of the string type.
872
- The tensor of column :py:obj: `translation` is of the string type.
886
+ The column :py:obj: `translation` is of the string type.
873
887
 
874
888
  Args:
875
889
  dataset_dir (str): Path to the root directory that contains the dataset.
876
- usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all' (default=None, all samples).
890
+ usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None, all samples.
877
891
  language_pair (sequence, optional): Sequence containing source and target language, supported values are
878
892
  ('en', 'fr'), ('en', 'de'), ('en', 'cs'), ('en', 'ar'), ('fr', 'en'), ('de', 'en'), ('cs', 'en'),
879
- ('ar', 'en') (default=('de', 'en')).
893
+ ('ar', 'en'). Default: ('de', 'en').
880
894
  valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
881
- of valid_set type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013'
882
- and 'tst2014' (default='tst2013').
883
- test_set (str, optional): A string to identify test set, when usage is test or all, the test set of test_set
884
- type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013' and 'tst2014'
885
- (default='tst2014').
886
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
887
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
888
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
889
- If shuffle is False, no shuffling will be performed.
890
- If shuffle is True, performs global shuffle.
891
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
895
+ of `valid_set` type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013'
896
+ and 'tst2014'. Default: 'tst2013'.
897
+ test_set (str, optional): A string to identify test set, when usage is test or all, the test set of `test_set`
898
+ type will be read, supported values are 'dev2010', 'tst2010', 'tst2011', 'tst2012', 'tst2013' and 'tst2014'.
899
+ Default: 'tst2014'.
900
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
901
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
902
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
903
+ If `shuffle` is False, no shuffling will be performed.
904
+ If `shuffle` is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
905
+ Set the mode of data shuffling by passing in enumeration variables:
892
906
 
893
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
907
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
894
908
 
895
909
  - Shuffle.FILES: Shuffle files only.
896
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
910
+
911
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
897
912
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
898
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
913
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
899
914
  argument can only be specified when `num_shards` is also specified.
900
- num_parallel_workers (int, optional): Number of workers to read the data
901
- (default=None, number set in the config).
915
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
916
+ Default: None, will use global default workers(8), it can be set
917
+ by `mindspore.dataset.config.set_num_parallel_workers` .
902
918
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
903
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
904
- (default=None, which means no cache is used).
919
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
920
+ Default: None, which means no cache is used.
905
921
 
906
922
  Raises:
907
923
  RuntimeError: If `dataset_dir` does not contain data files.
908
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
909
924
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
910
925
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
926
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
911
927
 
912
928
  Examples:
913
929
  >>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
@@ -918,8 +934,8 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
918
934
 
919
935
  IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
920
936
  of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
921
- obtained through the WIT3 website wit3.fbk.eu. The IWSLT2016 dataset includes translations from English to Arabic,
922
- Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
937
+ obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2016 dataset includes translations from
938
+ English to Arabic, Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
923
939
 
924
940
  You can unzip the original IWSLT2016 dataset files into this directory structure and read by MindSpore's API. After
925
941
  decompression, you also need to decompress the dataset to be read in the specified folder. For example, if you want
@@ -992,44 +1008,45 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
992
1008
 
993
1009
  class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
994
1010
  """
995
- A source dataset that reads and parses IWSLT2017 datasets.
1011
+ IWSLT2017(International Workshop on Spoken Language Translation) dataset.
996
1012
 
997
- The generated dataset has two columns: :py:obj:`[text, translation]`.
998
- The tensor of column :py:obj:`text` is of the string type.
999
- The tensor of column :py:obj:`translation` is of the string type.
1013
+ The generated dataset has two columns: :py:obj:`[text, translation]` .
1014
+ The tensor of column :py:obj:`text` and :py:obj:`translation` are of the string type.
1000
1015
 
1001
1016
  Args:
1002
1017
  dataset_dir (str): Path to the root directory that contains the dataset.
1003
- usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all' (default=None, all samples).
1018
+ usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: None, all samples.
1004
1019
  language_pair (sequence, optional): List containing src and tgt language, supported values are ('en', 'nl'),
1005
1020
  ('en', 'de'), ('en', 'it'), ('en', 'ro'), ('nl', 'en'), ('nl', 'de'), ('nl', 'it'), ('nl', 'ro'),
1006
1021
  ('de', 'en'), ('de', 'nl'), ('de', 'it'), ('de', 'ro'), ('it', 'en'), ('it', 'nl'), ('it', 'de'),
1007
- ('it', 'ro'), ('ro', 'en'), ('ro', 'nl'), ('ro', 'de'), ('ro', 'it') (default=('de', 'en')).
1008
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
1009
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1010
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1022
+ ('it', 'ro'), ('ro', 'en'), ('ro', 'nl'), ('ro', 'de'), ('ro', 'it'). Default: ('de', 'en').
1023
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
1024
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1025
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1011
1026
  If shuffle is False, no shuffling will be performed.
1012
- If shuffle is True, performs global shuffle.
1013
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
1027
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1028
+ Set the mode of data shuffling by passing in enumeration variables:
1014
1029
 
1015
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
1030
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
1016
1031
 
1017
1032
  - Shuffle.FILES: Shuffle files only.
1018
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1033
+
1034
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1019
1035
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1020
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1036
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1021
1037
  argument can only be specified when `num_shards` is also specified.
1022
- num_parallel_workers (int, optional): Number of workers to read the data
1023
- (default=None, number set in the config).
1038
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1039
+ Default: None, will use global default workers(8), it can be set
1040
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1024
1041
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1025
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1026
- (default=None, which means no cache is used).
1042
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1043
+ Default: None, which means no cache is used.
1027
1044
 
1028
1045
  Raises:
1029
1046
  RuntimeError: If `dataset_dir` does not contain data files.
1030
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1031
1047
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1032
1048
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1049
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1033
1050
 
1034
1051
  Examples:
1035
1052
  >>> iwslt2017_dataset_dir = "/path/to/iwslt2017_dataset_dir"
@@ -1039,8 +1056,8 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
1039
1056
 
1040
1057
  IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
1041
1058
  of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
1042
- obtained through the WIT3 website wit3.fbk.eu. The IWSLT2017 dataset involves German, English, Italian, Dutch, and
1043
- Romanian. The dataset includes translations in any two different languages.
1059
+ obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2017 dataset involves German, English,
1060
+ Italian, Dutch, and Romanian. The dataset includes translations in any two different languages.
1044
1061
 
1045
1062
  You can unzip the original IWSLT2017 dataset files into this directory structure and read by MindSpore's API. You
1046
1063
  need to decompress the dataset package in texts/DeEnItNlRo/DeEnItNlRo directory to get the DeEnItNlRo-DeEnItNlRo
@@ -1089,48 +1106,49 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
1089
1106
 
1090
1107
  class Multi30kDataset(SourceDataset, TextBaseDataset):
1091
1108
  """
1092
- A source dataset that reads and parses Multi30k dataset.
1109
+ Multi30k dataset.
1093
1110
 
1094
- The generated dataset has two columns :py:obj:`[text, translation]`.
1095
- The tensor of column :py:obj:'text' is of the string type.
1096
- The tensor of column :py:obj:'translation' is of the string type.
1111
+ The generated dataset has two columns :py:obj:`[text, translation]` .
1112
+ The tensor of column :py:obj:`text` is of the string type.
1113
+ The tensor of column :py:obj:`translation` is of the string type.
1097
1114
 
1098
- Args:
1115
+ Args:
1099
1116
  dataset_dir (str): Path to the root directory that contains the dataset.
1100
- usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all' (default='all').
1101
- language_pair (str, optional): Acceptable language_pair include ['en', 'de'], ['de', 'en']
1102
- (default=['en', 'de']).
1103
- num_samples (int, optional): The number of images to be included in the dataset
1104
- (default=None, all samples).
1105
- num_parallel_workers (int, optional): Number of workers to read the data
1106
- (default=None, number set in the config).
1107
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1108
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1109
- If shuffle is False, no shuffling will be performed;
1110
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1111
- Otherwise, there are two levels of shuffling:
1117
+ usage (str, optional): Acceptable usages include 'train', 'test, 'valid' or 'all'.
1118
+ Default: None, will read all samples.
1119
+ language_pair (Sequence[str, str], optional): Acceptable language_pair include ['en', 'de'], ['de', 'en'].
1120
+ Default: None, means ['en', 'de'].
1121
+ num_samples (int, optional): The number of images to be included in the dataset.
1122
+ Default: None, will read all samples.
1123
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1124
+ Default: None, will use global default workers(8), it can be set
1125
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1126
+ shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: None, means Shuffle.GLOBAL.
1127
+ If False is provided, no shuffling will be performed.
1128
+ If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
1129
+ If Shuffle is provided, the effect is as follows:
1112
1130
 
1113
1131
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1114
-
1115
1132
  - Shuffle.FILES: Shuffle files only.
1116
1133
 
1117
1134
  num_shards (int, optional): Number of shards that the dataset will be divided
1118
- into (default=None). When this argument is specified, `num_samples` reflects
1135
+ into. Default: None. When this argument is specified, `num_samples` reflects
1119
1136
  the max sample number of per shard.
1120
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1137
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1121
1138
  argument can only be specified when `num_shards` is also specified.
1122
1139
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1123
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1124
- (default=None, which means no cache is used).
1140
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1141
+ Default: None, which means no cache is used.
1125
1142
 
1126
1143
  Raises:
1127
1144
  RuntimeError: If `dataset_dir` does not contain data files.
1128
- RuntimeError: If `usage` is not 'train', 'test', 'valid' or 'all'.
1129
- RuntimeError: If the length of language_pair is not equal to 2.
1130
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1145
+ ValueError: If `usage` is not 'train', 'test', 'valid' or 'all'.
1146
+ TypeError: If `language_pair` is not of type Sequence[str, str].
1147
+ RuntimeError: If num_samples is less than 0.
1148
+ RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
1131
1149
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1132
1150
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1133
- RuntimeError: If num_samples is less than 0.
1151
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1134
1152
 
1135
1153
  Examples:
1136
1154
  >>> multi30k_dataset_dir = "/path/to/multi30k_dataset_directory"
@@ -1138,11 +1156,11 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
1138
1156
 
1139
1157
  About Multi30k dataset:
1140
1158
 
1141
- Multi30K is a dataset to stimulate multilingual multimodal research for English-German.
1142
- It is based on the Flickr30k dataset, which contains images sourced from online
1143
- photo-sharing websites. Each image is paired with five English descriptions, which were
1144
- collected from Amazon Mechanical Turk. The Multi30K dataset extends the Flickr30K
1145
- dataset with translated and independent German sentences.
1159
+ Multi30K is a multilingual dataset that features approximately 31,000 standardized images
1160
+ described in multiple languages. The images are sourced from Flickr and each image comes
1161
+ with sentence descripitions in both English and German, as well as descriptions in other
1162
+ languages. Multi30k is used primarily for training and testing in tasks such as image
1163
+ captioning, machine translation, and visual question answering.
1146
1164
 
1147
1165
  You can unzip the dataset files into the following directory structure and read by MindSpore's API.
1148
1166
 
@@ -1190,9 +1208,9 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
1190
1208
 
1191
1209
  class PennTreebankDataset(SourceDataset, TextBaseDataset):
1192
1210
  """
1193
- A source dataset that reads and parses PennTreebank datasets.
1211
+ PennTreebank dataset.
1194
1212
 
1195
- The generated dataset has one column :py:obj:`[text]`.
1213
+ The generated dataset has one column :py:obj:`[text]` .
1196
1214
  The tensor of column :py:obj:`text` is of the string type.
1197
1215
 
1198
1216
  Args:
@@ -1201,27 +1219,34 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
1201
1219
  'train' will read from 42,068 train samples of string type,
1202
1220
  'test' will read from 3,370 test samples of string type,
1203
1221
  'valid' will read from 3,761 test samples of string type,
1204
- 'all' will read from all 49,199 samples of string type (default=None, all samples).
1205
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
1206
- num_parallel_workers (int, optional): Number of workers to read the data
1207
- (default=None, number set in the config).
1208
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1209
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1222
+ 'all' will read from all 49,199 samples of string type. Default: None, all samples.
1223
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
1224
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1225
+ Default: None, will use global default workers(8), it can be set
1226
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1227
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1228
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1210
1229
  If shuffle is False, no shuffling will be performed.
1211
- If shuffle is True, performs global shuffle.
1212
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
1230
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1231
+ Set the mode of data shuffling by passing in enumeration variables:
1213
1232
 
1214
- - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
1233
+ - Shuffle.GLOBAL: Shuffle both the files and samples.
1215
1234
 
1216
1235
  - Shuffle.FILES: Shuffle files only.
1217
1236
 
1218
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1219
- When this argument is specified, 'num_samples' reflects the max sample number of per shard.
1220
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1237
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1238
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1239
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1221
1240
  argument can only be specified when `num_shards` is also specified.
1222
1241
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1223
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1224
- (default=None, which means no cache is used).
1242
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1243
+ Default: None, which means no cache is used.
1244
+
1245
+ Raises:
1246
+ RuntimeError: If `dataset_dir` does not contain data files.
1247
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1248
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1249
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1225
1250
 
1226
1251
  Examples:
1227
1252
  >>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
@@ -1281,43 +1306,42 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
1281
1306
 
1282
1307
  class SogouNewsDataset(SourceDataset, TextBaseDataset):
1283
1308
  r"""
1284
- A source dataset that reads and parses Sogou News dataset.
1309
+ Sogou News dataset.
1285
1310
 
1286
- The generated dataset has three columns: :py:obj:`[index, title, content]`.
1287
- The tensor of column :py:obj:`index` is of the string type.
1288
- The tensor of column :py:obj:`title` is of the string type.
1289
- The tensor of column :py:obj:`content` is of the string type.
1311
+ The generated dataset has three columns: :py:obj:`[index, title, content]` ,
1312
+ and the data type of three columns is string.
1290
1313
 
1291
1314
  Args:
1292
1315
  dataset_dir (str): Path to the root directory that contains the dataset.
1293
1316
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' .
1294
1317
  'train' will read from 450,000 train samples, 'test' will read from 60,000 test samples,
1295
- 'all' will read from all 510,000 samples (default=None, all samples).
1296
- num_samples (int, optional): Number of samples (rows) to read (default=None, read all samples).
1297
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1298
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1318
+ 'all' will read from all 510,000 samples. Default: None, all samples.
1319
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, read all samples.
1320
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1321
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1299
1322
  If shuffle is False, no shuffling will be performed.
1300
- If shuffle is True, performs global shuffle.
1301
- There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
1323
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1324
+ Set the mode of data shuffling by passing in enumeration variables:
1302
1325
 
1303
1326
  - Shuffle.GLOBAL: Shuffle both the files and samples, same as setting shuffle to True.
1304
1327
 
1305
1328
  - Shuffle.FILES: Shuffle files only.
1306
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1329
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1307
1330
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1308
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1331
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1309
1332
  argument can only be specified when `num_shards` is also specified.
1310
- num_parallel_workers (int, optional): Number of workers to read the data
1311
- (default=None, number set in the config).
1333
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1334
+ Default: None, will use global default workers(8), it can be set
1335
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1312
1336
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1313
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1314
- (default=None, which means no cache is used).
1337
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1338
+ Default: None, which means no cache is used.
1315
1339
 
1316
1340
  Raises:
1317
1341
  RuntimeError: If `dataset_dir` does not contain data files.
1318
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1319
1342
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1320
1343
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1344
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1321
1345
 
1322
1346
  Examples:
1323
1347
  >>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
@@ -1369,46 +1393,46 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
1369
1393
 
1370
1394
  class SQuADDataset(SourceDataset, TextBaseDataset):
1371
1395
  """
1372
- A source dataset that reads and parses SQuAD 1.1 and SQuAD 2.0 datasets.
1396
+ SQuAD 1.1 and SQuAD 2.0 datasets.
1373
1397
 
1374
1398
  The generated dataset with different versions and usages has the same output columns:
1375
- :py:obj:`[context, question, text, answer_start]`.
1399
+ :py:obj:`[context, question, text, answer_start]` .
1376
1400
  The tensor of column :py:obj:`context` is of the string type.
1377
1401
  The tensor of column :py:obj:`question` is of the string type.
1378
1402
  The tensor of column :py:obj:`text` is the answer in the context of the string type.
1379
1403
  The tensor of column :py:obj:`answer_start` is the start index of answer in context,
1380
- which is of the uint32 type.
1404
+ which is of the uint32 type.
1381
1405
 
1382
1406
  Args:
1383
1407
  dataset_dir (str): Path to the root directory that contains the dataset.
1384
- usage (str, optional): Specify the `train`, `dev` or `all` part of dataset (default=None, all samples).
1385
- num_samples (int, optional): The number of samples to be included in the dataset
1386
- (default=None, will include all samples).
1387
- num_parallel_workers (int, optional): Number of workers to read the data
1388
- (default=None, number set in the config).
1389
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1390
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1391
- If shuffle is False, no shuffling will be performed;
1392
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1393
- Otherwise, there are two levels of shuffling:
1408
+ usage (str, optional): Specify the 'train', 'dev' or 'all' part of dataset. Default: None, all samples.
1409
+ num_samples (int, optional): The number of samples to be included in the dataset.
1410
+ Default: None, will include all samples.
1411
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1412
+ Default: None, will use global default workers(8), it can be set
1413
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1414
+ shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: Shuffle.GLOBAL.
1415
+ If False is provided, no shuffling will be performed.
1416
+ If True is provided, it is the same as setting to mindspore.dataset.Shuffle.GLOBAL.
1417
+ If Shuffle is provided, the effect is as follows:
1394
1418
 
1395
1419
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1396
-
1397
1420
  - Shuffle.FILES: Shuffle files only.
1398
1421
 
1399
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1422
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1400
1423
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1401
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1424
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1402
1425
  argument can only be specified when `num_shards` is also specified.
1403
1426
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1404
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1405
- (default=None, which means no cache is used).
1427
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1428
+ Default: None, which means no cache is used.
1406
1429
 
1407
1430
  Raises:
1408
1431
  RuntimeError: If `dataset_dir` does not contain data files.
1409
1432
  ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1410
1433
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1411
1434
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1435
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1412
1436
 
1413
1437
  Examples:
1414
1438
  >>> squad_dataset_dir = "/path/to/squad_dataset_file"
@@ -1416,7 +1440,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
1416
1440
 
1417
1441
  About SQuAD dataset:
1418
1442
 
1419
- Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by
1443
+ SQuAD (Stanford Question Answering Dataset) is a reading comprehension dataset, consisting of questions posed by
1420
1444
  crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
1421
1445
  from the corresponding reading passage, or the question might be unanswerable.
1422
1446
 
@@ -1430,6 +1454,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
1430
1454
  For SQuAD 1.1:
1431
1455
 
1432
1456
  .. code-block::
1457
+
1433
1458
  .
1434
1459
  └── SQuAD1
1435
1460
  ├── train-v1.1.json
@@ -1438,6 +1463,7 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
1438
1463
  For SQuAD 2.0:
1439
1464
 
1440
1465
  .. code-block::
1466
+
1441
1467
  .
1442
1468
  └── SQuAD2
1443
1469
  ├── train-v2.0.json
@@ -1479,6 +1505,106 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
1479
1505
  self.num_shards, self.shard_id)
1480
1506
 
1481
1507
 
1508
+ class SST2Dataset(SourceDataset, TextBaseDataset):
1509
+ """
1510
+ SST2(Stanford Sentiment Treebank v2) dataset.
1511
+
1512
+ The generated dataset's train.tsv and dev.tsv have two columns :py:obj:`[sentence, label]` .
1513
+ The generated dataset's test.tsv has one column :py:obj:`[sentence]` .
1514
+ The tensor of column :py:obj:`sentence` and :py:obj:`label` are of the string type.
1515
+
1516
+ Args:
1517
+ dataset_dir (str): Path to the root directory that contains the dataset.
1518
+ usage (str, optional): Usage of this dataset, can be `train`, `test` or `dev`. `train` will read
1519
+ from 67,349 train samples, `test` will read from 1,821 test samples, `dev` will read from
1520
+ all 872 samples. Default: None, will read train samples.
1521
+ num_samples (int, optional): The number of samples to be included in the dataset.
1522
+ Default: None, will include all text.
1523
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1524
+ Default: None, will use global default workers(8), it can be set
1525
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1526
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1527
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1528
+ If shuffle is False, no shuffling will be performed;
1529
+ If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1530
+ Set the mode of data shuffling by passing in enumeration variables:
1531
+
1532
+ - Shuffle.GLOBAL: Shuffle the samples.
1533
+
1534
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1535
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1536
+ shard_id (int, optional): The shard ID within num_shards. This argument can only be specified when
1537
+ num_shards is also specified. Default: None.
1538
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1539
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1540
+ Default: None, which means no cache is used.
1541
+
1542
+ Raises:
1543
+ RuntimeError: If `dataset_dir` does not contain data files.
1544
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1545
+ RuntimeError: If `num_shards` is specified but shard_id is None.
1546
+ RuntimeError: If `shard_id` is specified but num_shards is None.
1547
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1548
+
1549
+ Examples:
1550
+ >>> sst2_dataset_dir = "/path/to/sst2_dataset_directory"
1551
+ >>>
1552
+ >>> # 1) Read 3 samples from SST2 dataset
1553
+ >>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, num_samples=3)
1554
+ >>>
1555
+ >>> # 2) Read train samples from SST2 dataset
1556
+ >>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, usage="train")
1557
+
1558
+ About SST2 dataset:
1559
+ The Stanford Sentiment Treebank is a corpus with fully labeled parse trees that allows for a complete
1560
+ analysis of the compositional effects of sentiment in language. The corpus is based on the dataset introduced
1561
+ by Pang and Lee (2005) and consists of 11,855 single sentences extracted from movie reviews. It was parsed
1562
+ with the Stanford parser and includes a total of 215,154 unique phrases from those parse trees, each
1563
+ annotated by 3 human judges.
1564
+
1565
+ Here is the original SST2 dataset structure.
1566
+ You can unzip the dataset files into this directory structure and read by Mindspore's API.
1567
+
1568
+ .. code-block::
1569
+
1570
+ .
1571
+ └── sst2_dataset_dir
1572
+ ├── train.tsv
1573
+ ├── test.tsv
1574
+ ├── dev.tsv
1575
+ └── original
1576
+
1577
+ Citation:
1578
+
1579
+ .. code-block::
1580
+
1581
+ @inproceedings{socher-etal-2013-recursive,
1582
+ title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
1583
+ author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
1584
+ Christopher D. and Ng, Andrew and Potts, Christopher},
1585
+ booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
1586
+ month = oct,
1587
+ year = {2013},
1588
+ address = {Seattle, Washington, USA},
1589
+ publisher = {Association for Computational Linguistics},
1590
+ url = {https://www.aclweb.org/anthology/D13-1170},
1591
+ pages = {1631--1642},
1592
+ }
1593
+ """
1594
+
1595
+ @check_sst2_dataset
1596
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
1597
+ num_shards=None, shard_id=None, cache=None):
1598
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1599
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1600
+ self.dataset_dir = dataset_dir
1601
+ self.usage = replace_none(usage, "train")
1602
+
1603
+ def parse(self, children=None):
1604
+ return cde.SST2Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
1605
+ self.num_shards, self.shard_id)
1606
+
1607
+
1482
1608
  class TextFileDataset(SourceDataset, TextBaseDataset):
1483
1609
  """
1484
1610
  A source dataset that reads and parses datasets stored on disk in text format.
@@ -1487,12 +1613,13 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
1487
1613
  Args:
1488
1614
  dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
1489
1615
  pattern of files. The list will be sorted in a lexicographical order.
1490
- num_samples (int, optional): The number of samples to be included in the dataset
1491
- (default=None, will include all images).
1492
- num_parallel_workers (int, optional): Number of workers to read the data
1493
- (default=None, number set in the config).
1494
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1495
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1616
+ num_samples (int, optional): The number of samples to be included in the dataset.
1617
+ Default: None, will include all images.
1618
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1619
+ Default: None, will use global default workers(8), it can be set
1620
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1621
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1622
+ Default: `Shuffle.GLOBAL` . Bool type and Shuffle enum are both supported to pass in.
1496
1623
  If shuffle is False, no shuffling will be performed.
1497
1624
  If shuffle is True, performs global shuffle.
1498
1625
  There are three levels of shuffling, desired shuffle enum defined by mindspore.dataset.Shuffle.
@@ -1501,20 +1628,20 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
1501
1628
 
1502
1629
  - Shuffle.FILES: Shuffle files only.
1503
1630
 
1504
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1631
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1505
1632
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1506
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1633
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1507
1634
  argument can only be specified when `num_shards` is also specified.
1508
1635
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1509
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1510
- (default=None, which means no cache is used).
1636
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1637
+ Default: None, which means no cache is used.
1511
1638
 
1512
1639
  Raises:
1513
1640
  ValueError: If dataset_files are not valid or do not exist.
1514
1641
  ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1515
1642
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1516
1643
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1517
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1644
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1518
1645
 
1519
1646
  Examples:
1520
1647
  >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
@@ -1536,48 +1663,66 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
1536
1663
 
1537
1664
  class UDPOSDataset(SourceDataset, TextBaseDataset):
1538
1665
  """
1539
- A source dataset that reads and parses UDPOS dataset.
1666
+ UDPOS(Universal Dependencies dataset for Part of Speech) dataset.
1540
1667
 
1541
- The generated dataset has three columns: :py:obj:`[word, universal, stanford]`.
1542
- The tensor of column :py:obj:`word` is of the string type.
1543
- The tensor of column :py:obj:`universal` is of the string type.
1544
- The tensor of column :py:obj:`stanford` is of the string type.
1668
+ The generated dataset has three columns: :py:obj:`[word, universal, stanford]` ,
1669
+ and the data type of three columns is string.
1545
1670
 
1546
1671
  Args:
1547
1672
  dataset_dir (str): Path to the root directory that contains the dataset.
1548
1673
  usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read from
1549
1674
  12,543 train samples, 'test' will read from 2,077 test samples, 'valid' will read from 2,002 test samples,
1550
- 'all' will read from all 16,622 samples (default=None, all samples).
1551
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
1552
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1553
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1554
- If shuffle is False, no shuffling will be performed;
1555
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1556
- Otherwise, there are two levels of shuffling:
1675
+ 'all' will read from all 16,622 samples. Default: None, all samples.
1676
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
1677
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1678
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1679
+ If shuffle is False, no shuffling will be performed.
1680
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1681
+ Set the mode of data shuffling by passing in enumeration variables:
1557
1682
 
1558
1683
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1559
1684
 
1560
1685
  - Shuffle.FILES: Shuffle files only.
1561
1686
 
1562
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1687
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1563
1688
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1564
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1689
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1565
1690
  argument can only be specified when `num_shards` is also specified.
1566
- num_parallel_workers (int, optional): Number of workers to read the data
1567
- (default=None, number set in the config).
1691
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1692
+ Default: None, will use global default workers(8), it can be set
1693
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1568
1694
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1569
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1570
- (default=None, which means no cache is used).
1695
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1696
+ Default: None, which means no cache is used.
1571
1697
 
1572
1698
  Raises:
1573
1699
  RuntimeError: If `dataset_dir` does not contain data files.
1574
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1575
1700
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1576
1701
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1702
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1577
1703
 
1578
1704
  Examples:
1579
1705
  >>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
1580
1706
  >>> dataset = ds.UDPOSDataset(dataset_dir=udpos_dataset_dir, usage='all')
1707
+
1708
+ About UDPOS dataset:
1709
+
1710
+ Text corpus dataset that clarifies syntactic or semantic sentence structure.
1711
+ The corpus comprises 254,830 words and 16,622 sentences, taken from various web media including
1712
+ weblogs, newsgroups, emails and reviews.
1713
+
1714
+ Citation:
1715
+
1716
+ .. code-block::
1717
+
1718
+ @inproceedings{silveira14gold,
1719
+ year = {2014},
1720
+ author = {Natalia Silveira and Timothy Dozat and Marie-Catherine de Marneffe and Samuel Bowman
1721
+ and Miriam Connor and John Bauer and Christopher D. Manning},
1722
+ title = {A Gold Standard Dependency Corpus for {E}nglish},
1723
+ booktitle = {Proceedings of the Ninth International Conference on Language
1724
+ Resources and Evaluation (LREC-2014)}
1725
+ }
1581
1726
  """
1582
1727
 
1583
1728
  @check_udpos_dataset
@@ -1595,38 +1740,43 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
1595
1740
 
1596
1741
  class WikiTextDataset(SourceDataset, TextBaseDataset):
1597
1742
  """
1598
- A source dataset that reads and parses WikiText2 and WikiText103 datasets.
1743
+ WikiText2 and WikiText103 datasets.
1599
1744
 
1600
- The generated dataset has one column :py:obj:`[text]`.
1601
- The tensor of column :py:obj:`text` is of the string type.
1745
+ The generated dataset has one column :py:obj:`[text]` , and
1746
+ the tensor of column `text` is of the string type.
1602
1747
 
1603
1748
  Args:
1604
1749
  dataset_dir (str): Path to the root directory that contains the dataset.
1605
- usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all' (default=None, all samples).
1606
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
1607
- num_parallel_workers (int, optional): Number of workers to read the data
1608
- (default=None, number set in the config).
1609
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1610
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1611
- If shuffle is False, no shuffling will be performed;
1612
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1613
- Otherwise, there are two levels of shuffling:
1750
+ usage (str, optional): Acceptable usages include 'train', 'test', 'valid' and 'all'. Default: None, all samples.
1751
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads the full dataset.
1752
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1753
+ Default: None, will use global default workers(8), it can be set
1754
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1755
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1756
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1757
+ If shuffle is False, no shuffling will be performed.
1758
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1759
+ Set the mode of data shuffling by passing in enumeration variables:
1614
1760
 
1615
1761
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1616
1762
 
1617
1763
  - Shuffle.FILES: Shuffle files only.
1618
1764
 
1619
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1620
- When this argument is specified, 'num_samples' reflects the max sample number of per shard.
1621
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1765
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1766
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1767
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1622
1768
  argument can only be specified when `num_shards` is also specified.
1623
1769
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1624
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1625
- (default=None, which means no cache is used).
1770
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1771
+ Default: None, which means no cache is used.
1626
1772
 
1627
- Examples:
1628
- >>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
1629
- >>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
1773
+ Raises:
1774
+ RuntimeError: If `dataset_dir` does not contain data files or invalid.
1775
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1776
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1777
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1778
+ ValueError: If `num_samples` is invalid (< 0).
1779
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1630
1780
 
1631
1781
  About WikiTextDataset dataset:
1632
1782
 
@@ -1657,6 +1807,10 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
1657
1807
  journal={arXiv preprint arXiv:1609.07843},
1658
1808
  year={2016}
1659
1809
  }
1810
+
1811
+ Examples:
1812
+ >>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
1813
+ >>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
1660
1814
  """
1661
1815
 
1662
1816
  @check_wiki_text_dataset
@@ -1674,47 +1828,44 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
1674
1828
 
1675
1829
  class YahooAnswersDataset(SourceDataset, TextBaseDataset):
1676
1830
  """
1677
- A source dataset that reads and parses the YahooAnswers dataset.
1831
+ YahooAnswers dataset.
1678
1832
 
1679
- The generated dataset has three columns :py:obj:`[class, title, content, answer]`.
1680
- The tensor of column :py:obj:`class` is of the string type.
1681
- The tensor of column :py:obj:`title` is of the string type.
1682
- The tensor of column :py:obj:`content` is of the string type.
1683
- The tensor of column :py:obj:`answer` is of the string type.
1833
+ The generated dataset has four columns :py:obj:`[class, title, content, answer]` , whose data type is string.
1684
1834
 
1685
1835
  Args:
1686
1836
  dataset_dir (str): Path to the root directory that contains the dataset.
1687
1837
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read
1688
1838
  from 1,400,000 train samples, 'test' will read from 60,000 test samples, 'all' will read from
1689
- all 1,460,000 samples (default=None, all samples).
1690
- num_samples (int, optional): The number of samples to be included in the dataset
1691
- (default=None, will include all text).
1692
- num_parallel_workers (int, optional): Number of workers to read the data
1693
- (default=None, number set in the config).
1694
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1695
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1696
- If shuffle is False, no shuffling will be performed;
1697
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1698
- Otherwise, there are two levels of shuffling:
1839
+ all 1,460,000 samples. Default: None, all samples.
1840
+ num_samples (int, optional): The number of samples to be included in the dataset.
1841
+ Default: None, will include all text.
1842
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1843
+ Default: None, will use global default workers(8), it can be set
1844
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1845
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1846
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1847
+ If shuffle is False, no shuffling will be performed.
1848
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1849
+ Set the mode of data shuffling by passing in enumeration variables:
1699
1850
 
1700
1851
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1701
1852
 
1702
1853
  - Shuffle.FILES: Shuffle files only.
1703
1854
 
1704
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1855
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1705
1856
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1706
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1857
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1707
1858
  argument can only be specified when `num_shards` is also specified.
1708
1859
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1709
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1710
- (default=None, which means no cache is used).
1860
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1861
+ Default: None, which means no cache is used.
1711
1862
 
1712
1863
  Raises:
1713
1864
  RuntimeError: If `dataset_dir` does not contain data files.
1714
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1715
1865
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1716
1866
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1717
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1867
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1868
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1718
1869
 
1719
1870
  Examples:
1720
1871
  >>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
@@ -1772,11 +1923,9 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
1772
1923
 
1773
1924
  class YelpReviewDataset(SourceDataset, TextBaseDataset):
1774
1925
  """
1775
- A source dataset that reads and parses Yelp Review Polarity and Yelp Review Full dataset.
1926
+ Yelp Review Polarity and Yelp Review Full datasets.
1776
1927
 
1777
- The generated dataset has two columns: :py:obj:`[label, text]`.
1778
- The tensor of column :py:obj:`label` is of the string type.
1779
- The tensor of column :py:obj:`text` is of the string type.
1928
+ The generated dataset has two columns: :py:obj:`[label, text]` , and the data type of two columns is string.
1780
1929
 
1781
1930
  Args:
1782
1931
  dataset_dir (str): Path to the root directory that contains the dataset.
@@ -1784,32 +1933,33 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
1784
1933
  For Polarity, 'train' will read from 560,000 train samples, 'test' will read from 38,000 test samples,
1785
1934
  'all' will read from all 598,000 samples.
1786
1935
  For Full, 'train' will read from 650,000 train samples, 'test' will read from 50,000 test samples,
1787
- 'all' will read from all 700,000 samples (default=None, all samples).
1788
- num_samples (int, optional): Number of samples (rows) to read (default=None, reads all samples).
1789
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
1790
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
1791
- If shuffle is False, no shuffling will be performed;
1792
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1793
- Otherwise, there are two levels of shuffling:
1936
+ 'all' will read from all 700,000 samples. Default: None, all samples.
1937
+ num_samples (int, optional): Number of samples (rows) to read. Default: None, reads all samples.
1938
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1939
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
1940
+ If shuffle is False, no shuffling will be performed.
1941
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
1942
+ Set the mode of data shuffling by passing in enumeration variables:
1794
1943
 
1795
1944
  - Shuffle.GLOBAL: Shuffle both the files and samples.
1796
1945
 
1797
1946
  - Shuffle.FILES: Shuffle files only.
1798
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1947
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1799
1948
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1800
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1949
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1801
1950
  argument can only be specified when `num_shards` is also specified.
1802
- num_parallel_workers (int, optional): Number of workers to read the data
1803
- (default=None, number set in the config).
1951
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1952
+ Default: None, will use global default workers(8), it can be set
1953
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1804
1954
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1805
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1806
- (default=None, which means no cache is used).
1955
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1956
+ Default: None, which means no cache is used.
1807
1957
 
1808
1958
  Raises:
1809
1959
  RuntimeError: If `dataset_dir` does not contain data files.
1810
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1811
1960
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1812
1961
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1962
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1813
1963
 
1814
1964
  Examples:
1815
1965
  >>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"