mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +2 -2
  3. mindspore/__init__.py +5 -2
  4. mindspore/_akg/akg/build_module.py +5 -6
  5. mindspore/_akg/akg/composite/build_module.py +49 -16
  6. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  7. mindspore/_akg/akg/config/repository.json +195 -0
  8. mindspore/_akg/akg/global_configs.py +5 -1
  9. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  10. mindspore/_akg/akg/tvm/api.py +4 -3
  11. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  12. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  13. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  14. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  15. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  16. mindspore/_akg/akg/tvm/build_module.py +16 -1
  17. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  18. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  19. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  20. mindspore/_akg/akg/tvm/module.py +1 -2
  21. mindspore/_akg/akg/tvm/stmt.py +2 -2
  22. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  23. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  24. mindspore/_akg/akg/utils/op_dsl.py +17 -1
  25. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  26. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  27. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  28. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  29. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  30. mindspore/_check_jit_forbidden_api.py +5 -1
  31. mindspore/_checkparam.py +79 -62
  32. mindspore/_extends/graph_kernel/__init__.py +0 -1
  33. mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
  34. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  35. mindspore/_extends/graph_kernel/splitter.py +1 -9
  36. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
  37. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  38. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  39. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
  40. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
  41. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  42. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  43. mindspore/_extends/parse/__init__.py +19 -17
  44. mindspore/_extends/parse/namespace.py +7 -36
  45. mindspore/_extends/parse/parser.py +375 -189
  46. mindspore/_extends/parse/resources.py +36 -41
  47. mindspore/_extends/parse/standard_method.py +350 -245
  48. mindspore/_extends/parse/trope.py +2 -12
  49. mindspore/_extends/remote/kernel_build_server.py +24 -7
  50. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  51. mindspore/_install_custom.py +43 -0
  52. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  53. mindspore/amp.py +85 -19
  54. mindspore/bin/cache_admin +0 -0
  55. mindspore/bin/cache_server +0 -0
  56. mindspore/boost/base.py +2 -2
  57. mindspore/boost/boost.py +27 -32
  58. mindspore/boost/boost_cell_wrapper.py +37 -13
  59. mindspore/boost/grad_accumulation.py +1 -1
  60. mindspore/boost/grad_freeze.py +34 -6
  61. mindspore/boost/group_loss_scale_manager.py +15 -14
  62. mindspore/boost/less_batch_normalization.py +28 -3
  63. mindspore/common/__init__.py +15 -11
  64. mindspore/common/_auto_dynamic.py +68 -0
  65. mindspore/common/_jit_fallback_utils.py +111 -0
  66. mindspore/common/_register_for_adapter.py +17 -5
  67. mindspore/common/_register_for_tensor.py +2 -2
  68. mindspore/common/_stub_tensor.py +18 -15
  69. mindspore/common/_utils.py +31 -7
  70. mindspore/common/api.py +269 -101
  71. mindspore/common/auto_dynamic_shape.py +498 -0
  72. mindspore/common/dtype.py +61 -21
  73. mindspore/common/dump.py +9 -7
  74. mindspore/common/initializer.py +106 -76
  75. mindspore/common/jit_config.py +35 -14
  76. mindspore/common/lazy_inline.py +187 -0
  77. mindspore/common/mindir_util.py +101 -0
  78. mindspore/common/mutable.py +10 -13
  79. mindspore/common/parameter.py +246 -55
  80. mindspore/common/seed.py +13 -7
  81. mindspore/common/sparse_tensor.py +29 -33
  82. mindspore/common/tensor.py +907 -251
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +84 -4
  85. mindspore/communication/management.py +160 -88
  86. mindspore/config/op_info.config +99 -75
  87. mindspore/config/super_bar_config.json +36 -4
  88. mindspore/context.py +526 -219
  89. mindspore/dataset/__init__.py +9 -46
  90. mindspore/dataset/audio/__init__.py +4 -19
  91. mindspore/dataset/audio/transforms.py +545 -233
  92. mindspore/dataset/audio/utils.py +21 -18
  93. mindspore/dataset/callback/ds_callback.py +42 -13
  94. mindspore/dataset/core/config.py +158 -100
  95. mindspore/dataset/core/validator_helpers.py +1 -63
  96. mindspore/dataset/debug/debug_hook.py +45 -13
  97. mindspore/dataset/debug/pre_defined_hook.py +5 -5
  98. mindspore/dataset/engine/__init__.py +0 -5
  99. mindspore/dataset/engine/cache_client.py +38 -15
  100. mindspore/dataset/engine/datasets.py +615 -278
  101. mindspore/dataset/engine/datasets_audio.py +154 -283
  102. mindspore/dataset/engine/datasets_standard_format.py +104 -116
  103. mindspore/dataset/engine/datasets_text.py +443 -326
  104. mindspore/dataset/engine/datasets_user_defined.py +251 -164
  105. mindspore/dataset/engine/datasets_vision.py +839 -1443
  106. mindspore/dataset/engine/iterators.py +11 -4
  107. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
  108. mindspore/dataset/engine/obs/util.py +3 -0
  109. mindspore/dataset/engine/offload.py +6 -6
  110. mindspore/dataset/engine/queue.py +15 -14
  111. mindspore/dataset/engine/samplers.py +39 -23
  112. mindspore/dataset/engine/serializer_deserializer.py +22 -6
  113. mindspore/dataset/engine/validators.py +21 -331
  114. mindspore/dataset/text/__init__.py +5 -33
  115. mindspore/dataset/text/transforms.py +334 -165
  116. mindspore/dataset/text/utils.py +215 -145
  117. mindspore/dataset/transforms/__init__.py +1 -1
  118. mindspore/dataset/transforms/c_transforms.py +3 -2
  119. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  120. mindspore/dataset/transforms/transforms.py +174 -71
  121. mindspore/dataset/utils/browse_dataset.py +25 -17
  122. mindspore/dataset/utils/line_reader.py +24 -21
  123. mindspore/dataset/vision/__init__.py +5 -26
  124. mindspore/dataset/vision/c_transforms.py +177 -165
  125. mindspore/dataset/vision/py_transforms.py +114 -119
  126. mindspore/dataset/vision/py_transforms_util.py +54 -51
  127. mindspore/dataset/vision/transforms.py +1127 -381
  128. mindspore/dataset/vision/utils.py +54 -38
  129. mindspore/dataset/vision/validators.py +12 -2
  130. mindspore/experimental/map_parameter.py +38 -4
  131. mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
  132. mindspore/experimental/optim/adam.py +192 -0
  133. mindspore/experimental/optim/adamw.py +181 -0
  134. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  135. mindspore/experimental/optim/optimizer.py +252 -0
  136. mindspore/experimental/optim/sgd.py +147 -0
  137. mindspore/gen_ops.py +273 -0
  138. mindspore/include/OWNERS +1 -2
  139. mindspore/include/api/context.h +21 -1
  140. mindspore/include/api/data_type.h +2 -1
  141. mindspore/include/api/graph.h +0 -15
  142. mindspore/include/api/kernel.h +2 -0
  143. mindspore/include/api/kernel_api.h +37 -12
  144. mindspore/include/api/model.h +29 -42
  145. mindspore/include/api/model_group.h +14 -3
  146. mindspore/include/api/model_parallel_runner.h +18 -2
  147. mindspore/include/api/serialization.h +26 -0
  148. mindspore/include/api/status.h +1 -0
  149. mindspore/include/api/types.h +38 -4
  150. mindspore/include/c_api/ms/abstract.h +67 -0
  151. mindspore/include/c_api/ms/attribute.h +197 -0
  152. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  153. mindspore/include/c_api/ms/base/macros.h +32 -0
  154. mindspore/include/c_api/ms/base/status.h +33 -0
  155. mindspore/include/c_api/ms/base/types.h +282 -0
  156. mindspore/include/c_api/ms/context.h +102 -0
  157. mindspore/include/c_api/ms/graph.h +160 -0
  158. mindspore/include/c_api/ms/node.h +606 -0
  159. mindspore/include/c_api/ms/tensor.h +161 -0
  160. mindspore/include/c_api/ms/value.h +84 -0
  161. mindspore/include/c_api/status_c.h +3 -0
  162. mindspore/include/dataset/constants.h +6 -12
  163. mindspore/include/dataset/execute.h +23 -13
  164. mindspore/include/dataset/text.h +26 -26
  165. mindspore/include/dataset/transforms.h +25 -31
  166. mindspore/include/dataset/vision.h +60 -60
  167. mindspore/include/dataset/vision_ascend.h +5 -6
  168. mindspore/include/dataset/vision_lite.h +17 -17
  169. mindspore/include/mindapi/base/format.h +0 -1
  170. mindspore/include/mindapi/base/type_id.h +2 -1
  171. mindspore/include/mindapi/base/types.h +5 -1
  172. mindspore/lib/libdnnl.so.2 +0 -0
  173. mindspore/lib/libjemalloc.so.2 +0 -0
  174. mindspore/lib/libmindspore.so +0 -0
  175. mindspore/lib/libmindspore_backend.so +0 -0
  176. mindspore/lib/libmindspore_common.so +0 -0
  177. mindspore/lib/libmindspore_core.so +0 -0
  178. mindspore/lib/libmindspore_glog.so.0 +0 -0
  179. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  180. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  181. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  182. mindspore/lib/libmindspore_shared_lib.so +0 -0
  183. mindspore/lib/libmpi_adapter.so +0 -0
  184. mindspore/lib/libnnacl.so +0 -0
  185. mindspore/lib/libopencv_core.so.4.5 +0 -0
  186. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  187. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  188. mindspore/lib/libps_cache.so +0 -0
  189. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  192. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  193. mindspore/lib/plugin/ascend/libakg.so +0 -0
  194. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  195. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  196. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  197. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  198. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  199. mindspore/lib/plugin/cpu/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  201. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  202. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  203. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  204. mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
  205. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  206. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  207. mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
  208. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  209. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  210. mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
  211. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  212. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  213. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  214. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  215. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  216. mindspore/log.py +9 -6
  217. mindspore/mindrecord/filereader.py +33 -4
  218. mindspore/mindrecord/filewriter.py +70 -35
  219. mindspore/mindrecord/mindpage.py +40 -34
  220. mindspore/mindrecord/shardreader.py +1 -1
  221. mindspore/mindrecord/shardsegment.py +1 -1
  222. mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
  223. mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
  224. mindspore/mindrecord/tools/csv_to_mr.py +29 -13
  225. mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
  226. mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
  227. mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
  228. mindspore/nn/cell.py +463 -169
  229. mindspore/nn/dynamic_lr.py +47 -43
  230. mindspore/nn/layer/activation.py +225 -82
  231. mindspore/nn/layer/basic.py +121 -79
  232. mindspore/nn/layer/channel_shuffle.py +21 -21
  233. mindspore/nn/layer/combined.py +33 -26
  234. mindspore/nn/layer/container.py +277 -22
  235. mindspore/nn/layer/conv.py +441 -304
  236. mindspore/nn/layer/dense.py +19 -13
  237. mindspore/nn/layer/embedding.py +62 -49
  238. mindspore/nn/layer/flash_attention.py +264 -0
  239. mindspore/nn/layer/image.py +50 -39
  240. mindspore/nn/layer/math.py +62 -51
  241. mindspore/nn/layer/normalization.py +219 -167
  242. mindspore/nn/layer/padding.py +58 -70
  243. mindspore/nn/layer/pooling.py +334 -287
  244. mindspore/nn/layer/rnn_cells.py +53 -38
  245. mindspore/nn/layer/rnns.py +59 -56
  246. mindspore/nn/layer/thor_layer.py +52 -44
  247. mindspore/nn/layer/timedistributed.py +6 -4
  248. mindspore/nn/layer/transformer.py +284 -164
  249. mindspore/nn/learning_rate_schedule.py +34 -25
  250. mindspore/nn/loss/__init__.py +3 -2
  251. mindspore/nn/loss/loss.py +554 -311
  252. mindspore/nn/optim/ada_grad.py +12 -9
  253. mindspore/nn/optim/adadelta.py +14 -11
  254. mindspore/nn/optim/adafactor.py +19 -16
  255. mindspore/nn/optim/adam.py +62 -47
  256. mindspore/nn/optim/adamax.py +13 -10
  257. mindspore/nn/optim/adasum.py +12 -8
  258. mindspore/nn/optim/asgd.py +10 -9
  259. mindspore/nn/optim/ftrl.py +20 -17
  260. mindspore/nn/optim/lamb.py +16 -12
  261. mindspore/nn/optim/lars.py +8 -6
  262. mindspore/nn/optim/lazyadam.py +25 -20
  263. mindspore/nn/optim/momentum.py +10 -7
  264. mindspore/nn/optim/optimizer.py +61 -9
  265. mindspore/nn/optim/proximal_ada_grad.py +14 -13
  266. mindspore/nn/optim/rmsprop.py +17 -13
  267. mindspore/nn/optim/rprop.py +30 -17
  268. mindspore/nn/optim/sgd.py +40 -23
  269. mindspore/nn/optim/thor.py +24 -26
  270. mindspore/nn/probability/bijector/bijector.py +11 -11
  271. mindspore/nn/probability/bijector/exp.py +1 -1
  272. mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
  273. mindspore/nn/probability/bijector/invert.py +1 -1
  274. mindspore/nn/probability/bijector/power_transform.py +29 -29
  275. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  276. mindspore/nn/probability/bijector/softplus.py +5 -5
  277. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
  278. mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
  279. mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
  280. mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
  281. mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
  282. mindspore/nn/probability/distribution/_utils/utils.py +1 -1
  283. mindspore/nn/probability/distribution/bernoulli.py +9 -9
  284. mindspore/nn/probability/distribution/beta.py +8 -8
  285. mindspore/nn/probability/distribution/categorical.py +23 -15
  286. mindspore/nn/probability/distribution/cauchy.py +5 -6
  287. mindspore/nn/probability/distribution/distribution.py +3 -3
  288. mindspore/nn/probability/distribution/exponential.py +4 -4
  289. mindspore/nn/probability/distribution/gamma.py +10 -10
  290. mindspore/nn/probability/distribution/geometric.py +8 -8
  291. mindspore/nn/probability/distribution/gumbel.py +8 -9
  292. mindspore/nn/probability/distribution/half_normal.py +5 -5
  293. mindspore/nn/probability/distribution/laplace.py +5 -5
  294. mindspore/nn/probability/distribution/log_normal.py +12 -11
  295. mindspore/nn/probability/distribution/logistic.py +8 -8
  296. mindspore/nn/probability/distribution/normal.py +6 -5
  297. mindspore/nn/probability/distribution/poisson.py +10 -11
  298. mindspore/nn/probability/distribution/student_t.py +8 -9
  299. mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
  300. mindspore/nn/probability/distribution/uniform.py +11 -11
  301. mindspore/nn/reinforcement/tensor_array.py +2 -2
  302. mindspore/nn/sparse/sparse.py +9 -9
  303. mindspore/nn/wrap/cell_wrapper.py +188 -63
  304. mindspore/nn/wrap/grad_reducer.py +21 -12
  305. mindspore/nn/wrap/loss_scale.py +136 -49
  306. mindspore/numpy/__init__.py +4 -4
  307. mindspore/numpy/array_creations.py +55 -56
  308. mindspore/numpy/array_ops.py +134 -35
  309. mindspore/numpy/logic_ops.py +66 -20
  310. mindspore/numpy/math_ops.py +142 -139
  311. mindspore/numpy/utils_const.py +2 -2
  312. mindspore/offline_debug/convert_async.py +2 -2
  313. mindspore/ops/_grad_experimental/__init__.py +7 -5
  314. mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
  315. mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
  316. mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
  317. mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
  318. mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
  319. mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
  320. mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
  321. mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
  322. mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
  323. mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
  324. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
  325. mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
  326. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  327. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  328. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
  329. mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
  330. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
  331. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
  332. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
  333. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
  334. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  335. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
  336. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
  337. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
  338. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  339. mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
  340. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  341. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  342. mindspore/ops/_op_impl/aicpu/cast.py +52 -0
  343. mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
  344. mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
  345. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  346. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
  347. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  348. mindspore/ops/_op_impl/aicpu/eye.py +4 -4
  349. mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
  350. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
  351. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  352. mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
  353. mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
  354. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  355. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  356. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  357. mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
  358. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
  359. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  360. mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
  361. mindspore/ops/_op_impl/aicpu/median.py +1 -0
  362. mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
  363. mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
  364. mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
  365. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
  366. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  367. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  368. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  369. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  370. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  371. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
  372. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
  373. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
  374. mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
  375. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  376. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  377. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  378. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  379. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
  380. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
  381. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  382. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  383. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  384. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  385. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  386. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
  387. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
  388. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
  389. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
  390. mindspore/ops/_op_impl/tbe/__init__.py +6 -4
  391. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  392. mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
  393. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
  394. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
  395. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
  396. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
  397. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
  398. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  399. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
  400. mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
  401. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
  402. mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
  403. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
  404. mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
  405. mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
  406. mindspore/ops/_op_impl/tbe/im2col.py +4 -4
  407. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  408. mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
  409. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
  410. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
  411. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  412. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
  413. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  414. mindspore/ops/_primitive_cache.py +1 -1
  415. mindspore/ops/_tracefunc.py +241 -0
  416. mindspore/ops/_utils/utils.py +10 -2
  417. mindspore/ops/_vmap/vmap_array_ops.py +5 -3
  418. mindspore/ops/_vmap/vmap_base.py +5 -4
  419. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  420. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  421. mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
  422. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  423. mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
  424. mindspore/ops/arg_dtype_cast.py +54 -0
  425. mindspore/ops/composite/__init__.py +7 -5
  426. mindspore/ops/composite/base.py +78 -34
  427. mindspore/ops/composite/math_ops.py +5 -695
  428. mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
  429. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
  430. mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
  431. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  432. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  433. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
  434. mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
  435. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
  436. mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
  437. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
  438. mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
  439. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
  440. mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
  441. mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
  442. mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
  443. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
  444. mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
  445. mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
  446. mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
  447. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  448. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  449. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
  450. mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
  451. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
  452. mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
  453. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  454. mindspore/ops/deprecated.py +304 -0
  455. mindspore/ops/function/__init__.py +41 -4
  456. mindspore/ops/function/array_func.py +1108 -467
  457. mindspore/ops/function/clip_func.py +94 -27
  458. mindspore/ops/function/debug_func.py +3 -1
  459. mindspore/ops/function/grad/grad_func.py +82 -73
  460. mindspore/ops/function/image_func.py +28 -12
  461. mindspore/ops/function/linalg_func.py +135 -39
  462. mindspore/ops/function/math_func.py +3779 -894
  463. mindspore/ops/function/nn_func.py +1584 -657
  464. mindspore/ops/function/parameter_func.py +13 -3
  465. mindspore/ops/function/random_func.py +247 -153
  466. mindspore/ops/function/sparse_func.py +14 -11
  467. mindspore/ops/function/sparse_unary_func.py +173 -47
  468. mindspore/ops/function/spectral_func.py +8 -4
  469. mindspore/ops/function/vmap_func.py +8 -7
  470. mindspore/ops/functional.py +47 -16
  471. mindspore/ops/op_info_register.py +346 -86
  472. mindspore/ops/operations/__init__.py +38 -22
  473. mindspore/ops/operations/_grad_ops.py +145 -149
  474. mindspore/ops/operations/_inner_ops.py +298 -56
  475. mindspore/ops/operations/_ms_kernel.py +3 -3
  476. mindspore/ops/operations/_quant_ops.py +24 -28
  477. mindspore/ops/operations/_rl_inner_ops.py +9 -7
  478. mindspore/ops/operations/_scalar_ops.py +115 -0
  479. mindspore/ops/operations/_sequence_ops.py +148 -10
  480. mindspore/ops/operations/_tensor_array.py +1 -1
  481. mindspore/ops/operations/_thor_ops.py +2 -2
  482. mindspore/ops/operations/array_ops.py +1239 -561
  483. mindspore/ops/operations/comm_ops.py +166 -90
  484. mindspore/ops/operations/control_ops.py +3 -3
  485. mindspore/ops/operations/custom_ops.py +124 -102
  486. mindspore/ops/operations/debug_ops.py +24 -11
  487. mindspore/ops/operations/image_ops.py +86 -71
  488. mindspore/ops/operations/inner_ops.py +18 -13
  489. mindspore/ops/operations/linalg_ops.py +30 -11
  490. mindspore/ops/operations/math_ops.py +1730 -435
  491. mindspore/ops/operations/nn_ops.py +1953 -943
  492. mindspore/ops/operations/other_ops.py +65 -43
  493. mindspore/ops/operations/random_ops.py +258 -98
  494. mindspore/ops/operations/rl_ops.py +4 -36
  495. mindspore/ops/operations/sparse_ops.py +38 -33
  496. mindspore/ops/operations/spectral_ops.py +8 -4
  497. mindspore/ops/primitive.py +66 -44
  498. mindspore/ops/signature.py +5 -5
  499. mindspore/parallel/_auto_parallel_context.py +80 -19
  500. mindspore/parallel/_cost_model_context.py +42 -0
  501. mindspore/parallel/_offload_context.py +162 -72
  502. mindspore/parallel/_parallel_serialization.py +2 -2
  503. mindspore/parallel/_ps_context.py +16 -4
  504. mindspore/parallel/_recovery_context.py +2 -1
  505. mindspore/parallel/_tensor.py +15 -13
  506. mindspore/parallel/_transformer/layers.py +8 -6
  507. mindspore/parallel/_transformer/loss.py +1 -0
  508. mindspore/parallel/_transformer/moe.py +7 -7
  509. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  510. mindspore/parallel/_transformer/transformer.py +34 -14
  511. mindspore/parallel/_utils.py +36 -14
  512. mindspore/parallel/algo_parameter_config.py +114 -20
  513. mindspore/parallel/checkpoint_transform.py +16 -18
  514. mindspore/parallel/shard.py +16 -13
  515. mindspore/profiler/__init__.py +1 -1
  516. mindspore/profiler/common/struct_type.py +3 -3
  517. mindspore/profiler/common/util.py +3 -2
  518. mindspore/profiler/envprofiling.py +11 -4
  519. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  520. mindspore/profiler/parser/ascend_flops_generator.py +94 -0
  521. mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
  522. mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
  523. mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
  524. mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
  525. mindspore/profiler/parser/ascend_op_generator.py +276 -0
  526. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  527. mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
  528. mindspore/profiler/parser/base_timeline_generator.py +11 -7
  529. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
  530. mindspore/profiler/parser/flops_parser.py +15 -11
  531. mindspore/profiler/parser/framework_parser.py +92 -73
  532. mindspore/profiler/parser/hccl_parser.py +16 -12
  533. mindspore/profiler/parser/integrator.py +22 -11
  534. mindspore/profiler/parser/memory_usage_parser.py +36 -11
  535. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  536. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  537. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  538. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  539. mindspore/profiler/parser/optime_parser.py +1 -1
  540. mindspore/profiler/parser/profiler_info.py +4 -5
  541. mindspore/profiler/parser/step_trace_parser.py +11 -14
  542. mindspore/profiler/profiling.py +678 -377
  543. mindspore/rewrite/api/node.py +211 -54
  544. mindspore/rewrite/api/node_type.py +5 -0
  545. mindspore/rewrite/api/pattern_engine.py +22 -23
  546. mindspore/rewrite/api/scoped_value.py +20 -17
  547. mindspore/rewrite/api/symbol_tree.py +252 -106
  548. mindspore/rewrite/api/tree_node_helper.py +3 -0
  549. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  550. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  551. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  552. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
  553. mindspore/rewrite/common/rewrite_elog.py +5 -1
  554. mindspore/rewrite/namer.py +51 -51
  555. mindspore/rewrite/namespace.py +14 -5
  556. mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
  557. mindspore/rewrite/node/call_function.py +79 -0
  558. mindspore/rewrite/node/cell_container.py +135 -0
  559. mindspore/rewrite/node/control_flow.py +88 -0
  560. mindspore/rewrite/{node.py → node/node.py} +313 -247
  561. mindspore/rewrite/node/node_manager.py +254 -0
  562. mindspore/rewrite/node/node_topological_manager.py +243 -0
  563. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  564. mindspore/rewrite/parsers/assign_parser.py +225 -239
  565. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  566. mindspore/rewrite/parsers/class_def_parser.py +179 -218
  567. mindspore/rewrite/parsers/constant_parser.py +9 -6
  568. mindspore/rewrite/parsers/container_parser.py +9 -7
  569. mindspore/rewrite/parsers/for_parser.py +36 -15
  570. mindspore/rewrite/parsers/function_def_parser.py +23 -20
  571. mindspore/rewrite/parsers/if_parser.py +28 -24
  572. mindspore/rewrite/parsers/module_parser.py +202 -25
  573. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  574. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  575. mindspore/rewrite/parsers/return_parser.py +6 -6
  576. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  577. mindspore/rewrite/sparsify/sparsify.py +4 -1
  578. mindspore/rewrite/sparsify/utils.py +11 -5
  579. mindspore/rewrite/symbol_tree.py +577 -732
  580. mindspore/rewrite/symbol_tree_builder.py +9 -175
  581. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  582. mindspore/run_check/_check_version.py +46 -39
  583. mindspore/run_check/run_check.py +3 -2
  584. mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
  585. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  586. mindspore/scipy/__init__.py +1 -1
  587. mindspore/scipy/linalg.py +67 -61
  588. mindspore/scipy/ops.py +5 -41
  589. mindspore/scipy/ops_grad.py +3 -2
  590. mindspore/scipy/ops_wrapper.py +5 -5
  591. mindspore/scipy/optimize/line_search.py +8 -8
  592. mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
  593. mindspore/scipy/optimize/minimize.py +16 -12
  594. mindspore/scipy/utils.py +1 -52
  595. mindspore/scipy/utils_const.py +4 -4
  596. mindspore/train/__init__.py +4 -4
  597. mindspore/train/_utils.py +13 -5
  598. mindspore/train/amp.py +410 -148
  599. mindspore/train/anf_ir_pb2.py +16 -4
  600. mindspore/train/callback/_backup_and_restore.py +8 -11
  601. mindspore/train/callback/_callback.py +80 -3
  602. mindspore/train/callback/_checkpoint.py +82 -51
  603. mindspore/train/callback/_early_stop.py +12 -15
  604. mindspore/train/callback/_history.py +1 -1
  605. mindspore/train/callback/_lambda_callback.py +13 -13
  606. mindspore/train/callback/_landscape.py +21 -17
  607. mindspore/train/callback/_loss_monitor.py +9 -10
  608. mindspore/train/callback/_on_request_exit.py +16 -33
  609. mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
  610. mindspore/train/callback/_summary_collector.py +44 -30
  611. mindspore/train/callback/_time_monitor.py +62 -12
  612. mindspore/train/data_sink.py +10 -16
  613. mindspore/train/dataset_helper.py +154 -86
  614. mindspore/train/loss_scale_manager.py +14 -9
  615. mindspore/train/metrics/__init__.py +10 -2
  616. mindspore/train/metrics/accuracy.py +1 -1
  617. mindspore/train/metrics/auc.py +1 -1
  618. mindspore/train/metrics/bleu_score.py +2 -2
  619. mindspore/train/metrics/confusion_matrix.py +14 -14
  620. mindspore/train/metrics/cosine_similarity.py +3 -3
  621. mindspore/train/metrics/dice.py +1 -1
  622. mindspore/train/metrics/fbeta.py +1 -1
  623. mindspore/train/metrics/hausdorff_distance.py +8 -6
  624. mindspore/train/metrics/mean_surface_distance.py +5 -4
  625. mindspore/train/metrics/metric.py +49 -17
  626. mindspore/train/metrics/occlusion_sensitivity.py +4 -4
  627. mindspore/train/metrics/perplexity.py +1 -1
  628. mindspore/train/metrics/precision.py +2 -2
  629. mindspore/train/metrics/recall.py +2 -3
  630. mindspore/train/metrics/roc.py +7 -7
  631. mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
  632. mindspore/train/metrics/topk.py +7 -4
  633. mindspore/train/mind_ir_pb2.py +193 -48
  634. mindspore/train/model.py +377 -133
  635. mindspore/train/serialization.py +697 -245
  636. mindspore/train/summary/_summary_adapter.py +5 -2
  637. mindspore/train/summary/_writer_pool.py +4 -3
  638. mindspore/train/summary/summary_record.py +25 -23
  639. mindspore/train/train_thor/convert_utils.py +39 -23
  640. mindspore/train/train_thor/dataset_helper.py +4 -3
  641. mindspore/train/train_thor/model_thor.py +8 -8
  642. mindspore/version.py +1 -1
  643. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
  644. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
  645. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  646. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  647. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  648. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  649. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  650. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  651. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  652. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  653. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  654. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  655. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  656. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  657. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  658. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  659. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  660. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  661. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  662. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  663. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  664. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  665. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  666. mindspore/_extends/graph_kernel/expander.py +0 -80
  667. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
  668. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  669. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  670. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  671. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  672. mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
  673. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  674. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  675. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  676. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  677. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  678. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  679. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  680. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  681. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  682. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  683. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  684. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  685. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  686. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  687. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  688. mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
  689. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  690. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  691. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  692. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  693. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  694. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  695. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  696. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  697. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  698. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  699. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  700. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  701. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  702. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  703. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  704. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  705. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  706. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  707. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  708. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  709. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  710. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  711. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  712. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  713. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  714. mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
  715. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  716. mindspore/_extends/parse/jit_fallback_modules.py +0 -51
  717. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  718. mindspore/dataset/engine/graphdata.py +0 -1586
  719. mindspore/include/api/net.h +0 -142
  720. mindspore/ops/_grad/grad_array_ops.py +0 -1347
  721. mindspore/ops/_grad/grad_clip_ops.py +0 -84
  722. mindspore/ops/_grad/grad_debug_ops.py +0 -68
  723. mindspore/ops/_grad/grad_inner_ops.py +0 -235
  724. mindspore/ops/_grad/grad_math_ops.py +0 -1684
  725. mindspore/ops/_grad/grad_nn_ops.py +0 -1529
  726. mindspore/ops/_grad/grad_other_ops.py +0 -89
  727. mindspore/ops/_grad/grad_sequence_ops.py +0 -296
  728. mindspore/ops/_grad/grad_sparse.py +0 -323
  729. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
  730. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
  731. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  732. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  733. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  734. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
  735. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
  736. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
  737. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
  738. mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
  739. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
  740. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
  741. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  742. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
  743. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  744. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
  745. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  746. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
  747. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
  748. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
  749. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  750. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  751. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
  752. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
  753. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
  754. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
  755. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
  756. mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
  757. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
  758. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
  759. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
  760. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  761. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
  762. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  763. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  764. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
  765. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
  766. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
  767. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  768. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  769. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  770. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
  771. mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
  772. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  773. mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
  774. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
  775. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
  776. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
  777. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
  778. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
  779. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
  780. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  781. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
  782. mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
  783. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
  784. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
  785. mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
  786. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  787. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
  788. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
  789. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
  790. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
  791. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
  792. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
  793. mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
  794. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  795. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
  796. mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
  797. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
  798. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
  799. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
  800. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
  801. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
  802. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
  803. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
  804. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
  805. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
  806. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
  807. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  808. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  809. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  810. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
  811. mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
  812. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  813. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  814. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
  815. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
  816. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
  817. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
  818. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  819. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  820. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  821. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
  822. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
  823. mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
  824. mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
  825. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
  826. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  827. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
  828. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
  829. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
  830. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
  831. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
  832. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
  833. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
  834. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
  835. mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
  836. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  837. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  838. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
  839. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
  840. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
  841. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  842. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
  843. mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
  844. mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
  845. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
  846. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  847. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
  848. mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
  849. mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
  850. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
  851. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  852. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
  853. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
  854. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  855. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
  856. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
  857. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  858. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  859. mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
  860. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
  861. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
  862. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
  863. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
  864. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  865. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
  866. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
  867. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
  868. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
  869. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  870. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  871. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
  872. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
  873. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
  874. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
  875. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
  876. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
  877. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
  878. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
  879. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  880. mindspore/rewrite/node_visitor.py +0 -44
  881. mindspore/rewrite/topological_manager.py +0 -203
  882. mindspore/scipy/sparse/linalg.py +0 -192
  883. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  884. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -29,8 +29,10 @@ import atexit
29
29
  import glob
30
30
  import json
31
31
  import os
32
+ import queue
32
33
  import signal
33
34
  import stat
35
+ import subprocess
34
36
  import warnings
35
37
 
36
38
  import gc
@@ -62,7 +64,7 @@ from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCE
62
64
  from mindspore.parallel._utils import _get_device_num
63
65
  from mindspore.dataset.debug import DebugHook
64
66
 
65
- from . import samplers
67
+ from mindspore.dataset.engine import samplers
66
68
  from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
67
69
  ITERATORS_LIST, _unset_iterator_cleanup
68
70
  from .queue import _SharedQueue, _Queue
@@ -127,8 +129,7 @@ def _reset_training_dataset(global_step, dataset_size):
127
129
  """
128
130
  dataset = _get_training_dataset()
129
131
  if dataset is not None:
130
- epoch = global_step // dataset_size
131
- dataset._reset(global_step, epoch) # pylint: disable=protected-access
132
+ dataset._reset(global_step, dataset_size) # pylint: disable=protected-access
132
133
  else:
133
134
  raise RuntimeError("Training dataset is not set.")
134
135
 
@@ -136,9 +137,9 @@ def _reset_training_dataset(global_step, dataset_size):
136
137
  class Shuffle(str, Enum):
137
138
  """Specify the shuffle mode.
138
139
 
139
- - Shuffle.GLOBAL: Shuffle both the files and samples.
140
- - Shuffle.FILES: Shuffle files only.
141
- - Shuffle.INFILE: Shuffle data within each file.
140
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
141
+ - ``Shuffle.FILES`` : Shuffle files only.
142
+ - ``Shuffle.INFILE`` : Shuffle data within each file.
142
143
  """
143
144
  GLOBAL: str = "global"
144
145
  FILES: str = "files"
@@ -208,7 +209,7 @@ def zip(datasets):
208
209
  The number of datasets must be more than 1.
209
210
 
210
211
  Returns:
211
- Dataset, dataset zipped.
212
+ Dataset, a new dataset with the above operation applied.
212
213
 
213
214
  Raises:
214
215
  ValueError: If the number of datasets is 1.
@@ -216,6 +217,10 @@ def zip(datasets):
216
217
 
217
218
  Examples:
218
219
  >>> # Create a dataset which is the combination of dataset_1 and dataset_2
220
+ >>> import mindspore.dataset as ds
221
+ >>>
222
+ >>> dataset_1 = ds.GeneratorDataset([1], "column1")
223
+ >>> dataset_2 = ds.GeneratorDataset([2], "column2")
219
224
  >>> dataset = ds.zip((dataset_1, dataset_2))
220
225
  """
221
226
  if len(datasets) <= 1:
@@ -316,7 +321,7 @@ class Dataset:
316
321
 
317
322
  Args:
318
323
  num_parallel_workers (int, optional): Number of workers to process the dataset in parallel.
319
- Default: None.
324
+ Default: ``None``.
320
325
  """
321
326
 
322
327
  def __init__(self, children=None, num_parallel_workers=None, cache=None):
@@ -346,6 +351,7 @@ class Dataset:
346
351
  self._repeat_count = None
347
352
  self._class_indexing = None
348
353
  self._sync = False
354
+ self._global_step = None
349
355
 
350
356
  @staticmethod
351
357
  def _get_operator_id(dataset):
@@ -382,36 +388,42 @@ class Dataset:
382
388
  _OP_PROCESS.update(generator_process)
383
389
  return op_name
384
390
 
385
- def create_ir_tree(self):
391
+ def create_ir_tree(self, getter_mode=False):
386
392
  """
387
393
  Internal method to build an IR tree.
388
394
 
395
+ Args:
396
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
397
+
389
398
  Returns:
390
- DatasetNode, the root node of the IR tree.
391
- Dataset, the root dataset of the IR tree.
399
+ Union[DatasetNode, Dataset], the root node of the IR tree and the root dataset of the IR tree.
392
400
  """
393
401
  parent = self.parent
394
402
  self.parent = []
395
403
  dataset = copy.deepcopy(self)
396
404
  global _OP_NAME
397
405
  _OP_NAME = Dataset._get_operator_id(dataset)
398
- ir_tree = dataset.parse_tree()
406
+ ir_tree = dataset.parse_tree(getter_mode)
399
407
  self.parent = parent
400
408
  _init_device_info()
401
409
  return ir_tree, dataset
402
410
 
403
- def parse_tree(self):
411
+ def parse_tree(self, getter_mode=False):
404
412
  """
405
413
  Internal method to parse the API tree into an IR tree.
406
414
 
415
+ Args:
416
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
417
+
407
418
  Returns:
408
419
  DatasetNode, the root node of the IR tree.
409
420
  """
410
421
  if len(self.parent) > 1:
411
422
  raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)")
412
- ir_children = [d.parse_tree() for d in self.children]
423
+ ir_children = [d.parse_tree(getter_mode) for d in self.children]
413
424
  # Bootstrap can only be performed on a copy of the original dataset node.
414
425
  # Bootstrap on original dataset node will make all iterators share the same process pool
426
+ self.pre_parse(getter_mode)
415
427
  self.iterator_bootstrap()
416
428
  ir_node = self.parse(ir_children)
417
429
  ir_node = self.post_parse(ir_node)
@@ -450,12 +462,15 @@ class Dataset:
450
462
  Serialize a pipeline into JSON string and dump into file if filename is provided.
451
463
 
452
464
  Args:
453
- filename (str): filename of JSON file to be saved as. Default: ''.
465
+ filename (str): filename of JSON file to be saved as. Default: ``""``.
454
466
 
455
467
  Returns:
456
468
  str, JSON string of the pipeline.
457
469
 
458
470
  Examples:
471
+ >>> import mindspore.dataset as ds
472
+ >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
473
+ >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
459
474
  >>> dataset_json = dataset.to_json("/path/to/mnist_dataset_pipeline.json")
460
475
  """
461
476
  ir_tree, _ = self.create_ir_tree()
@@ -489,7 +504,7 @@ class Dataset:
489
504
  element_length_function (Callable, optional): A function that takes in
490
505
  M arguments where M = len(column_names) and returns an integer. If no value
491
506
  provided, parameter M the len(column_names) must be 1, and the size of the first
492
- dimension of that column will be taken as the length. Default: None.
507
+ dimension of that column will be taken as the length. Default: ``None``.
493
508
  pad_info (dict, optional): The information about how to batch each column. The key
494
509
  corresponds to the column name, and the value must be a tuple of 2 elements.
495
510
  The first element corresponds to the shape to pad to, and the second
@@ -497,21 +512,22 @@ class Dataset:
497
512
  specified, then that column will be padded to the longest in the current
498
513
  batch, and 0 will be used as the padding value. Any None dimensions will
499
514
  be padded to the longest in the current batch, unless if
500
- `pad_to_bucket_boundary` is True. If no padding is wanted, set pad_info
501
- to None. Default: None.
502
- pad_to_bucket_boundary (bool, optional): If True, will pad each None
515
+ `pad_to_bucket_boundary` is ``True``. If no padding is wanted, set `pad_info`
516
+ to ``None``. Default: ``None``.
517
+ pad_to_bucket_boundary (bool, optional): If ``True``, will pad each None
503
518
  dimension in `pad_info` to the bucket_boundary minus 1. If there are any
504
519
  elements that fall into the last bucket, an error will occur.
505
- Default: False.
506
- drop_remainder (bool, optional): If True, will drop the last batch for each
507
- bucket if it is not a full batch. Default: False.
520
+ Default: ``False``.
521
+ drop_remainder (bool, optional): If ``True``, will drop the last batch for each
522
+ bucket if it is not a full batch. Default: ``False``.
508
523
 
509
524
  Returns:
510
- Dataset, dataset bucketized and batched by length.
525
+ Dataset, a new dataset with the above operation applied.
511
526
 
512
527
  Examples:
513
528
  >>> # Create a dataset where certain counts rows are combined into a batch
514
529
  >>> # and drops the last incomplete batch if there is one.
530
+ >>> import mindspore.dataset as ds
515
531
  >>> import numpy as np
516
532
  >>> def generate_2_columns(n):
517
533
  ... for i in range(n):
@@ -553,15 +569,16 @@ class Dataset:
553
569
  batch_size (Union[int, Callable]): The number of rows each batch is created with. An
554
570
  int or callable object which takes exactly 1 parameter, BatchInfo.
555
571
  drop_remainder (bool, optional): Determines whether or not to drop the last block
556
- whose data row number is less than batch size. Default: False. If True, and if there are less
557
- than batch_size rows available to make the last batch, then those rows will
558
- be dropped and not propagated to the child node.
572
+ whose data row number is less than batch size. Default: ``False`` . If ``True`` ,
573
+ and if there are less than `batch_size` rows available to make the last batch,
574
+ then those rows will be dropped and not propagated to the child node.
559
575
  num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
560
- Default: None.
576
+ Default: ``None`` .
561
577
  **kwargs:
562
578
 
563
579
  - per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], \
564
- (List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable. Default: None.
580
+ (List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable.
581
+ Default: ``None``.
565
582
  A callable which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters.
566
583
  Each list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists
567
584
  should match with the number of entries in input_columns. The last parameter of the callable should
@@ -570,30 +587,41 @@ class Dataset:
570
587
  as the input. output_columns is required if the number of output lists is different from input.
571
588
 
572
589
  - input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of
573
- the list should match with signature of per_batch_map callable. Default: None.
590
+ the list should match with signature of `per_batch_map` callable. Default: ``None`` .
574
591
 
575
592
  - output_columns (Union[str, list[str]], optional): List of names assigned to the columns
576
593
  outputted by the last operation. This parameter is mandatory if len(input_columns) !=
577
594
  len(output_columns). The size of this list must match the number of output
578
- columns of the last operation. Default: None, output columns will have the same
595
+ columns of the last operation. Default: ``None`` , output columns will have the same
579
596
  name as the input columns, i.e., the columns will be replaced.
580
597
 
581
598
  - python_multiprocessing (bool, optional): Parallelize Python function `per_batch_map` with
582
- multi-processing or multi-threading mode, True means multi-processing, False means multi-threading
583
- If `per_batch_map` is a I/O bound task, use multi-threading mode.
584
- If `per_batch_map` is a CPU bound task, it is recommended to use multi-processing mode.
585
- Default: False, use python multi-threading mode.
586
-
587
- - max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to
588
- copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
599
+ multi-processing or multi-threading mode, ``True`` means multi-processing,
600
+ ``False`` means multi-threading If `per_batch_map` is a I/O bound task, use
601
+ multi-threading mode. If `per_batch_map` is a CPU bound task, it is recommended to use
602
+ multi-processing mode. Default: ``False`` , use python multi-threading mode.
603
+
604
+ - max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
605
+ allocation to copy data between processes, the total occupied shared memory will increase as
606
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
607
+ used if python_multiprocessing is set to True. If it is an int value, it represents
608
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
609
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
610
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
611
+ to create shared memory. Default: 16.
589
612
 
590
613
  Returns:
591
- BatchDataset, dataset batched.
614
+ Dataset, a new dataset with the above operation applied.
592
615
 
593
616
  Examples:
594
- >>> # 1) Create a dataset where every 100 rows are combined into a batch
617
+ >>> # 1) Create a dataset where every 5 rows are combined into a batch
595
618
  >>> # and drops the last incomplete batch if there is one.
596
- >>> dataset = dataset.batch(100, True)
619
+ >>> import mindspore.dataset as ds
620
+ >>> from PIL import Image
621
+ >>>
622
+ >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
623
+ >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_samples=10)
624
+ >>> dataset = dataset.batch(5, True)
597
625
  >>>
598
626
  >>> # 2) resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
599
627
  >>> def np_resize(col, BatchInfo):
@@ -633,11 +661,11 @@ class Dataset:
633
661
  batch_size (Union[int, Callable]): The number of rows each batch is created with. An
634
662
  int or callable object which takes exactly 1 parameter, BatchInfo.
635
663
  drop_remainder (bool, optional): Determines whether or not to drop the last block
636
- whose data row number is less than batch size. Default: False. If True, and if there are less
637
- than batch_size rows available to make the last batch, then those rows will
664
+ whose data row number is less than batch size. Default: ``False``. If ``True``, and if there
665
+ are less than batch_size rows available to make the last batch, then those rows will
638
666
  be dropped and not propagated to the child node.
639
667
  num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
640
- Default: None.
668
+ Default: ``None``.
641
669
  pad_info (dict, optional): The information about how to batch each column. The key
642
670
  corresponds to the column name, and the value must be a tuple of 2 elements.
643
671
  The first element corresponds to the shape to pad to, and the second
@@ -645,19 +673,22 @@ class Dataset:
645
673
  specified, then that column will be padded to the longest in the current
646
674
  batch, and 0 will be used as the padding value. Any None dimensions will
647
675
  be padded to the longest in the current batch, unless if
648
- pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
649
- to None. Default: None.
676
+ pad_to_bucket_boundary is True. If no padding is wanted, set `pad_info`
677
+ to ``None``. Default: ``None``.
650
678
 
651
679
  Returns:
652
- PaddedBatchDataset, dataset batched.
680
+ Dataset, a new dataset with the above operation applied.
653
681
 
654
682
  Examples:
655
683
  >>> # 1) Pad every sample to the largest sample's shape and batch the samples
656
- >>> dataset = dataset.padded_batch(100, True, pad_info={})
684
+ >>> import mindspore.dataset as ds
685
+ >>> dataset = ds.NumpySlicesDataset([[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], "column1")
686
+ >>> dataset = dataset.padded_batch(2, True, pad_info={})
657
687
  >>>
658
- >>> # 2) Create a dataset where every 100 rows are combined into a batch
688
+ >>> # 2) Create a dataset where every 3 rows are combined into a batch
659
689
  >>> # and drops the last incomplete batch if there is one.
660
- >>> dataset = dataset.padded_batch(100, True)
690
+ >>> dataset = ds.NumpySlicesDataset([i for i in range(10)], "column1")
691
+ >>> dataset = dataset.padded_batch(3, True)
661
692
  >>>
662
693
  >>> # 3) Create a dataset where its batch size is dynamic
663
694
  >>> # Define a callable batch size function and let batch size increase 1 each time.
@@ -674,16 +705,19 @@ class Dataset:
674
705
 
675
706
  Args:
676
707
  condition_name (str): The condition name that is used to toggle sending next row.
677
- num_batch (int): the number of batches without blocking at the start of each epoch. Default: 1.
678
- callback (function): The callback function that will be invoked when sync_update is called. Default: None.
708
+ num_batch (int): the number of batches without blocking at the start of each epoch.
709
+ Default: ``1``.
710
+ callback (function): The callback function that will be invoked when sync_update is called.
711
+ Default: ``None``.
679
712
 
680
713
  Returns:
681
- SyncWaitDataset, dataset added a blocking condition.
714
+ Dataset, a new dataset with the above operation applied.
682
715
 
683
716
  Raises:
684
717
  RuntimeError: If condition name already exists.
685
718
 
686
719
  Examples:
720
+ >>> import mindspore.dataset as ds
687
721
  >>> import numpy as np
688
722
  >>> def gen():
689
723
  ... for i in range(100):
@@ -735,15 +769,18 @@ class Dataset:
735
769
  dataset will result in a global shuffle.
736
770
 
737
771
  Returns:
738
- Dataset, dataset shuffled.
772
+ Dataset, a new dataset with the above operation applied.
739
773
 
740
774
  Raises:
741
775
  RuntimeError: If exist sync operations before shuffle.
742
776
 
743
777
  Examples:
744
- >>> # dataset is an instance object of Dataset
745
- >>> # Optionally set the seed for the first epoch
778
+ >>> import mindspore.dataset as ds
779
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
780
+ >>>
781
+ >>> # Optionally set the seed for fixed randomness
746
782
  >>> ds.config.set_seed(58)
783
+ >>>
747
784
  >>> # Create a shuffled dataset using a shuffle buffer of size 4
748
785
  >>> dataset = dataset.shuffle(4)
749
786
  """
@@ -758,9 +795,10 @@ class Dataset:
758
795
  return a `Dataset` .
759
796
 
760
797
  Returns:
761
- Dataset, dataset applied by the function.
798
+ Dataset, a new dataset with the above operation applied.
762
799
 
763
800
  Examples:
801
+ >>> import mindspore.dataset as ds
764
802
  >>> # 1) flat_map on one column dataset
765
803
  >>> dataset = ds.NumpySlicesDataset([[0, 1], [2, 3]], shuffle=False)
766
804
  >>>
@@ -820,11 +858,11 @@ class Dataset:
820
858
  `output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
821
859
 
822
860
  - If you use transformations (
823
- `vision transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
861
+ `vision transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
824
862
  dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
825
- `nlp transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
863
+ `nlp transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
826
864
  dataset.transforms.html#module-mindspore.dataset.text>`_ ,
827
- `audio transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
865
+ `audio transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
828
866
  dataset.transforms.html#module-mindspore.dataset.audio>`_ )
829
867
  provided by mindspore dataset, please use the following parameters:
830
868
 
@@ -839,31 +877,37 @@ class Dataset:
839
877
  applied on the dataset. Operations are applied in the order they appear in this list.
840
878
  input_columns (Union[str, list[str]], optional): List of the names of the columns that will be passed to
841
879
  the first operation as input. The size of this list must match the number of
842
- input columns expected by the first operation. Default: None, the first
880
+ input columns expected by the first operation. Default: ``None``, the first
843
881
  operation will be passed however many columns that are required, starting from
844
882
  the first column.
845
883
  output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
846
884
  the last operation. This parameter is mandatory if len(input_columns) !=
847
885
  len(output_columns). The size of this list must match the number of output
848
- columns of the last operation. Default: None, output columns will have the same
886
+ columns of the last operation. Default: ``None``, output columns will have the same
849
887
  name as the input columns, i.e., the columns will be replaced.
850
888
  num_parallel_workers (int, optional): Number of threads used to process the dataset in
851
- parallel. Default: None, the value from the configuration will be used.
889
+ parallel. Default: ``None``, the value from the configuration will be used.
852
890
  **kwargs:
853
891
 
854
892
  - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
855
- This option could be beneficial if the Python operation is computational heavy. Default: False.
893
+ This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
856
894
 
857
- - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to
858
- copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
895
+ - max_rowsize (Union[int, list[int]], optional): Maximum size of row in MB that is used for shared
896
+ memory allocation to copy data between processes, the total occupied shared memory will increase as
897
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
898
+ used if python_multiprocessing is set to True. If it is an int value, it represents
899
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
900
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
901
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
902
+ to create shared memory. Default: 16.
859
903
 
860
904
  - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
861
- Default: None, which means no cache is used.
905
+ Default: ``None``, which means no cache is used.
862
906
 
863
907
  - callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called.
864
- Default: None.
908
+ Default: ``None``.
865
909
 
866
- - offload (bool, optional): Flag to indicate whether offload is used. Default: None.
910
+ - offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
867
911
 
868
912
  Note:
869
913
  - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
@@ -872,17 +916,21 @@ class Dataset:
872
916
  `operations` .
873
917
 
874
918
  Returns:
875
- Dataset, dataset after mapping operation.
919
+ Dataset, a new dataset with the above operation applied.
876
920
 
877
921
  Examples:
922
+ >>> import mindspore.dataset as ds
923
+ >>> import mindspore.dataset.vision as vision
878
924
  >>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
879
925
  >>> # image is of type bytes type which can be decoded to RGB
880
926
  >>> # label is of type int32
927
+ >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
928
+ >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir)
881
929
  >>>
882
930
  >>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
883
- >>> decode_op = c_vision.Decode(rgb=True)
884
- >>> random_jitter_op = c_vision.RandomColorAdjust(brightness=(0.8, 0.8), contrast=(1, 1),
885
- ... saturation=(1, 1), hue=(0, 0))
931
+ >>> decode_op = vision.Decode(to_pil=False)
932
+ >>> random_jitter_op = vision.RandomColorAdjust(brightness=(0.8, 0.8), contrast=(1, 1),
933
+ ... saturation=(1, 1), hue=(0, 0))
886
934
  >>>
887
935
  >>> # 1) Simple map example.
888
936
  >>>
@@ -948,16 +996,19 @@ class Dataset:
948
996
  Args:
949
997
  predicate (callable): Python callable which returns a boolean value. If False then filter the element.
950
998
  input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
951
- or provided with None, the predicate will be applied on all columns in the dataset. Default: None.
999
+ or provided with ``None``, the predicate will be applied on all columns in the dataset.
1000
+ Default: ``None``.
952
1001
  num_parallel_workers (int, optional): Number of workers to process the dataset
953
- in parallel. Default: None.
1002
+ in parallel. Default: ``None``.
954
1003
 
955
1004
  Returns:
956
- Dataset, dataset filtered.
1005
+ Dataset, a new dataset with the above operation applied.
957
1006
 
958
1007
  Examples:
959
- >>> # generator data(0 ~ 63)
1008
+ >>> # generator data(0 ~ 19)
960
1009
  >>> # filter the data that greater than or equal to 11
1010
+ >>> import mindspore.dataset as ds
1011
+ >>> dataset = ds.GeneratorDataset([i for i in range(20)], "data")
961
1012
  >>> dataset = dataset.filter(predicate=lambda data: data < 11, input_columns = ["data"])
962
1013
  """
963
1014
  return FilterDataset(self, predicate, input_columns, num_parallel_workers)
@@ -965,20 +1016,21 @@ class Dataset:
965
1016
  @check_repeat
966
1017
  def repeat(self, count=None):
967
1018
  """
968
- Repeat this dataset `count` times. Repeat infinitely if the count is None or -1.
1019
+ Repeat this dataset `count` times. Repeat infinitely if the `count` is ``None`` or ``-1``.
969
1020
 
970
1021
  Note:
971
1022
  The order of using repeat and batch reflects the number of batches. It is recommended that
972
1023
  the repeat operation is used after the batch operation.
973
1024
 
974
1025
  Args:
975
- count (int): Number of times the dataset is going to be repeated. Default: None.
1026
+ count (int): Number of times the dataset is going to be repeated. Default: ``None``.
976
1027
 
977
1028
  Returns:
978
- Dataset, dataset repeated.
1029
+ Dataset, a new dataset with the above operation applied.
979
1030
 
980
1031
  Examples:
981
- >>> # dataset is an instance object of Dataset
1032
+ >>> import mindspore.dataset as ds
1033
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
982
1034
  >>>
983
1035
  >>> # Create a dataset where the dataset is repeated for 50 epochs
984
1036
  >>> dataset = dataset.repeat(50)
@@ -1004,11 +1056,12 @@ class Dataset:
1004
1056
  count (int): Number of elements in the dataset to be skipped.
1005
1057
 
1006
1058
  Returns:
1007
- Dataset, dataset that containing rows like origin rows subtract skipped rows.
1059
+ Dataset, a new dataset with the above operation applied.
1008
1060
 
1009
1061
  Examples:
1010
- >>> # dataset is an instance object of Dataset
1011
- >>> # Create a dataset which skips first 3 elements from data
1062
+ >>> import mindspore.dataset as ds
1063
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1064
+ >>> # Skip first 3 elements of dataset and retain 7 elements.
1012
1065
  >>> dataset = dataset.skip(3)
1013
1066
  """
1014
1067
  return SkipDataset(self, count)
@@ -1016,23 +1069,28 @@ class Dataset:
1016
1069
  @check_take
1017
1070
  def take(self, count=-1):
1018
1071
  """
1019
- Takes at most given numbers of elements from the dataset.
1020
-
1021
- Note:
1022
- 1. If count is greater than the number of elements in the dataset or equal to -1,
1023
- all the elements in dataset will be taken.
1024
- 2. The order of using take and batch matters. If take is before batch operation,
1025
- then take the given number of rows; otherwise take the given number of batches.
1072
+ Take the first specified number of samples from the dataset.
1026
1073
 
1027
1074
  Args:
1028
- count (int, optional): Number of elements to be taken from the dataset. Default: -1.
1075
+ count (int, optional): The desired number of samples to take. If the value exceeds
1076
+ the total number of samples in the dataset, all data will be returned.
1077
+ Default: ``-1`` , will return all data.
1078
+
1079
+ Note:
1080
+ When there are operations that will change the number of samples of the dataset in
1081
+ the data pipeline, the location of the `take` operation can change its effect.
1082
+ For example, `batch` operation will combine the successive samples of the specified
1083
+ `batch_size` into 1 sample, so `.batch(batch_size).take(1)` will be equivalent to
1084
+ `.take(batch_size).batch(batch_size)`.
1029
1085
 
1030
1086
  Returns:
1031
- Dataset, dataset taken.
1087
+ Dataset, a new dataset with the above operation applied.
1032
1088
 
1033
1089
  Examples:
1034
- >>> # dataset is an instance object of Dataset
1035
- >>> # Create a dataset where the dataset includes 50 elements.
1090
+ >>> import mindspore.dataset as ds
1091
+ >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
1092
+ >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
1093
+ >>> # Take 50 samples from MNIST dataset.
1036
1094
  >>> dataset = dataset.take(50)
1037
1095
  """
1038
1096
  return TakeDataset(self, count)
@@ -1113,7 +1171,7 @@ class Dataset:
1113
1171
  - The sum of split sizes > K, the difference of sigma(round(fi * K)) - K will be removed from the first
1114
1172
  large enough split such that it will have at least 1 row after removing the difference.
1115
1173
 
1116
- randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
1174
+ randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
1117
1175
  If True, the data will be randomly split. Otherwise, each split will be created with
1118
1176
  consecutive rows from the dataset.
1119
1177
 
@@ -1124,7 +1182,7 @@ class Dataset:
1124
1182
  will be different in each epoch.
1125
1183
 
1126
1184
  Returns:
1127
- tuple(Dataset), a tuple of datasets that have been split.
1185
+ Tuple[Dataset], a tuple of new datasets split from the original one.
1128
1186
 
1129
1187
  Raises:
1130
1188
  RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -1136,9 +1194,9 @@ class Dataset:
1136
1194
  floats don't sum to 1.
1137
1195
 
1138
1196
  Examples:
1139
- >>> # TextFileDataset is not a mappable dataset, so this non-optimized split will be called.
1140
- >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
1141
- >>> dataset = ds.TextFileDataset(text_file_dataset_dir, shuffle=False)
1197
+ >>> # Split the data into train part and test part.
1198
+ >>> import mindspore.dataset as ds
1199
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1142
1200
  >>> train_dataset, test_dataset = dataset.split([0.9, 0.1])
1143
1201
  """
1144
1202
  if self.is_shuffled():
@@ -1179,14 +1237,17 @@ class Dataset:
1179
1237
  to be zipped together with this dataset.
1180
1238
 
1181
1239
  Returns:
1182
- Dataset, dataset zipped.
1240
+ Dataset, a new dataset with the above operation applied.
1183
1241
 
1184
1242
  Raises:
1185
1243
  TypeError: The parameter is not dataset object or tuple of dataset objects.
1186
1244
 
1187
1245
  Examples:
1188
- >>> # Create a dataset which is the combination of dataset and dataset_1
1189
- >>> dataset = dataset.zip(dataset_1)
1246
+ >>> # Create a dataset which is the combination of dataset_1 and dataset_2
1247
+ >>> import mindspore.dataset as ds
1248
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1")
1249
+ >>> dataset_2 = ds.GeneratorDataset([1, 2, 3], "column2")
1250
+ >>> dataset = dataset_1.zip(dataset_2)
1190
1251
  """
1191
1252
  if isinstance(datasets, tuple):
1192
1253
  datasets = (self, *datasets)
@@ -1202,6 +1263,12 @@ class Dataset:
1202
1263
  Concatenate the dataset objects in the input list.
1203
1264
  Performing "+" operation on dataset objects can achieve the same effect.
1204
1265
 
1266
+ For a dataset concatenated by many other dataset objects, it returns the data in the order of
1267
+ datasets passed in. If you want to change the data order(such as random selection from each dataset
1268
+ instead of in sequence), apply `use_sampler` method on the concatenated dataset object.
1269
+ Currently `use_sampler` supports `dataset.DistributedSampler` for sharding selection from each dataset
1270
+ or `dataset.RandomSampler` for random selection from each dataset, see examples below.
1271
+
1205
1272
  Note:
1206
1273
  The column name, and rank and type of the column data must be the same in the input datasets.
1207
1274
 
@@ -1210,13 +1277,45 @@ class Dataset:
1210
1277
  to be concatenated together with this dataset.
1211
1278
 
1212
1279
  Returns:
1213
- Dataset, dataset concatenated.
1280
+ Dataset, a new dataset with the above operation applied.
1214
1281
 
1215
1282
  Examples:
1283
+ >>> import mindspore.dataset as ds
1284
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1285
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1286
+ >>>
1216
1287
  >>> # Create a dataset by concatenating dataset_1 and dataset_2 with "+" operator
1217
1288
  >>> dataset = dataset_1 + dataset_2
1218
1289
  >>> # Create a dataset by concatenating dataset_1 and dataset_2 with concat operation
1219
1290
  >>> dataset = dataset_1.concat(dataset_2)
1291
+ >>>
1292
+ >>> # Check the data order of dataset
1293
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1294
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1295
+ >>> dataset = dataset_1 + dataset_2
1296
+ >>> result = list(dataset)
1297
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 2)],
1298
+ >>> # [Tensor(shape=[], dtype=Int64, value= 3)], [Tensor(shape=[], dtype=Int64, value= 4)],
1299
+ >>> # [Tensor(shape=[], dtype=Int64, value= 5)], [Tensor(shape=[], dtype=Int64, value= 6)]]
1300
+ >>>
1301
+ >>> # Change the data order of concatenated dataset with sharding selection
1302
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1303
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1304
+ >>> dataset = dataset_1.concat(dataset_2)
1305
+ >>> dataset.use_sampler(ds.DistributedSampler(num_shards=2, shard_id=1, shuffle=False))
1306
+ >>> result = list(dataset)
1307
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 4)],
1308
+ >>> # [Tensor(shape=[], dtype=Int64, value= 6)]]
1309
+ >>>
1310
+ >>> # Change the data order of concatenated dataset with random selection
1311
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1312
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1313
+ >>> dataset = dataset_1.concat(dataset_2)
1314
+ >>> dataset.use_sampler(ds.RandomSampler())
1315
+ >>> result = list(dataset)
1316
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 4)],
1317
+ >>> # [Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 5)],
1318
+ >>> # [Tensor(shape=[], dtype=Int64, value= 6)], [Tensor(shape=[], dtype=Int64, value= 3)]]
1220
1319
  """
1221
1320
  if isinstance(datasets, Dataset):
1222
1321
  datasets = [self] + [datasets]
@@ -1236,16 +1335,17 @@ class Dataset:
1236
1335
  output_columns (Union[str, list[str]]): List of names of the output columns.
1237
1336
 
1238
1337
  Returns:
1239
- Dataset, dataset renamed.
1338
+ Dataset, a new dataset with the above operation applied.
1240
1339
 
1241
1340
  Examples:
1242
- >>> # dataset is an instance object of Dataset
1341
+ >>> import mindspore.dataset as ds
1243
1342
  >>> input_columns = ["input_col1", "input_col2", "input_col3"]
1244
1343
  >>> output_columns = ["output_col1", "output_col2", "output_col3"]
1245
1344
  >>>
1246
- >>> # Create a dataset where input_col1 is renamed to output_col1, and
1247
- >>> # input_col2 is renamed to output_col2, and input_col3 is renamed
1248
- >>> # to output_col3.
1345
+ >>> # Create a dataset with 3 columns
1346
+ >>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
1347
+ >>>
1348
+ >>> # Rename "input_col1" to "output_col1", "input_col2" to "output_col2", "input_col3" to "output_col3"
1249
1349
  >>> dataset = dataset.rename(input_columns=input_columns, output_columns=output_columns)
1250
1350
  """
1251
1351
 
@@ -1261,13 +1361,15 @@ class Dataset:
1261
1361
  columns(Union[str, list[str]]): List of names of the columns to project.
1262
1362
 
1263
1363
  Returns:
1264
- Dataset, dataset projected.
1364
+ Dataset, a new dataset with the above operation applied.
1265
1365
 
1266
1366
  Examples:
1267
- >>> # dataset is an instance object of Dataset
1268
- >>> columns_to_project = ["column3", "column1", "column2"]
1367
+ >>> import mindspore.dataset as ds
1368
+ >>> # Create a dataset with 3 columns
1369
+ >>> input_columns = ["column1", "column2", "column3"]
1370
+ >>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
1269
1371
  >>>
1270
- >>> # Create a dataset that consists of column3, column1, column2
1372
+ >>> columns_to_project = ["column3", "column1", "column2"]
1271
1373
  >>> # in that order, regardless of the original order of columns.
1272
1374
  >>> dataset = dataset.project(columns=columns_to_project)
1273
1375
  """
@@ -1283,10 +1385,11 @@ class Dataset:
1283
1385
  return a preprocessed `Dataset` .
1284
1386
 
1285
1387
  Returns:
1286
- Dataset, dataset applied by the function.
1388
+ Dataset, a new dataset with the above operation applied.
1287
1389
 
1288
1390
  Examples:
1289
- >>> # dataset is an instance object of Dataset
1391
+ >>> import mindspore.dataset as ds
1392
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1290
1393
  >>>
1291
1394
  >>> # Declare an apply_func function which returns a Dataset object
1292
1395
  >>> def apply_func(data):
@@ -1310,41 +1413,45 @@ class Dataset:
1310
1413
  return dataset
1311
1414
 
1312
1415
  @check_device_send
1313
- def device_que(self, send_epoch_end=True, create_data_info_queue=False):
1416
+ def device_que(self, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
1314
1417
  """
1315
1418
  Return a transferred Dataset that transfers data through a device.
1316
1419
 
1317
1420
  Args:
1318
- send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
1421
+ send_epoch_end (bool, optional): Whether to send end of sequence to device or not.
1422
+ Default: ``True``.
1319
1423
  create_data_info_queue (bool, optional): Whether to create queue which stores
1320
- types and shapes of data or not. Default: False.
1424
+ types and shapes of data or not. Default: ``False``.
1425
+ queue_name (str, optional): Name of queue which connects dataset processing and model
1426
+ computing. Default: ``""``.
1321
1427
 
1322
1428
  Note:
1323
1429
  If device is Ascend, features of data will be transferred one by one. The limitation
1324
1430
  of data transmission per time is 256M.
1325
1431
 
1326
1432
  Returns:
1327
- Dataset, dataset for transferring.
1433
+ Dataset, a new dataset with the above operation applied.
1328
1434
 
1329
1435
  Examples:
1436
+ >>> import mindspore.dataset as ds
1330
1437
  >>> import time
1331
1438
  >>>
1332
1439
  >>> data = ds.TFRecordDataset('/path/to/TF_FILES', '/path/to/TF_SCHEMA_FILE', shuffle=ds.Shuffle.FILES)
1333
- >>>
1334
1440
  >>> data = data.device_que()
1335
1441
  >>> data.send()
1336
1442
  >>> time.sleep(0.1)
1337
1443
  >>> data.stop_send()
1338
1444
  """
1339
- return TransferDataset(self, send_epoch_end, create_data_info_queue)
1445
+ return TransferDataset(self, send_epoch_end, create_data_info_queue, queue_name)
1340
1446
 
1341
1447
  @check_save
1342
1448
  def save(self, file_name, num_files=1, file_type='mindrecord'):
1343
1449
  """
1344
1450
  Save the dynamic data processed by the dataset pipeline in common dataset format.
1345
- Supported dataset formats: `mindrecord` only. And you can use `MindDataset` API to read the saved file(s).
1451
+ Supported dataset formats: ``'mindrecord'`` only. And you can use
1452
+ :class:`mindspore.dataset.MindDataset` API to read the saved file(s).
1346
1453
 
1347
- Implicit type casting exists when saving data as `mindrecord` . The transform table shows how to do
1454
+ Implicit type casting exists when saving data as ``'mindrecord'`` . The transform table shows how to do
1348
1455
  type casting.
1349
1456
 
1350
1457
  .. list-table:: Implicit Type Casting when Saving as `mindrecord`
@@ -1395,27 +1502,27 @@ class Dataset:
1395
1502
  - Multi-dimensional string not supported
1396
1503
 
1397
1504
  Note:
1398
- 1. To save the samples in order, set dataset's shuffle to False and num_files to 1.
1505
+ 1. To save the samples in order, set dataset's `shuffle` to ``False`` and `num_files` to ``1``.
1399
1506
  2. Before calling the function, do not use batch operation, repeat operation or data augmentation operations
1400
1507
  with random attribute in map operation.
1401
1508
  3. When array dimension is variable, one-dimensional arrays or
1402
1509
  multi-dimensional arrays with variable dimension 0 are supported.
1403
- 4. Mindrecord does not support uint64, multi-dimensional uint8(drop dimension) nor
1510
+ 4. MindRecord does not support uint64, multi-dimensional uint8(drop dimension) nor
1404
1511
  multi-dimensional string.
1405
1512
 
1406
1513
  Args:
1407
1514
  file_name (str): Path to dataset file.
1408
- num_files (int, optional): Number of dataset files. Default: 1.
1409
- file_type (str, optional): Dataset format. Default: 'mindrecord'.
1515
+ num_files (int, optional): Number of dataset files. Default: ``1`` .
1516
+ file_type (str, optional): Dataset format. Default: ``'mindrecord'`` .
1410
1517
 
1411
1518
  Examples:
1519
+ >>> import mindspore.dataset as ds
1412
1520
  >>> import numpy as np
1413
1521
  >>>
1414
1522
  >>> def generator_1d():
1415
1523
  ... for i in range(10):
1416
1524
  ... yield (np.array([i]),)
1417
1525
  >>>
1418
- >>>
1419
1526
  >>> # apply dataset operations
1420
1527
  >>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
1421
1528
  >>> d1.save('/path/to/save_file')
@@ -1442,19 +1549,21 @@ class Dataset:
1442
1549
 
1443
1550
  Args:
1444
1551
  columns (list[str], optional): List of columns to be used to specify the order of columns.
1445
- Default: None, means all columns.
1552
+ Default: ``None``, means all columns.
1446
1553
  num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
1447
- Default: -1, iterator can be iterated infinite number of epochs.
1554
+ Default: ``-1``, iterator can be iterated infinite number of epochs.
1448
1555
  output_numpy (bool, optional): Whether or not to output NumPy datatype.
1449
- If output_numpy=False, iterator will output MSTensor. Default: False.
1450
- do_copy (bool, optional): When output data type is mindspore.Tensor,
1451
- use this param to select the conversion method, only take False for better performance. Default: True.
1556
+ If `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False``.
1557
+ do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
1558
+ use this param to select the conversion method, only take False for better performance.
1559
+ Default: ``True``.
1452
1560
 
1453
1561
  Returns:
1454
- Iterator, tuple iterator over the dataset.
1562
+ Iterator, a dataset iterator that returns data of type Tuple.
1455
1563
 
1456
1564
  Examples:
1457
- >>> # dataset is an instance object of Dataset
1565
+ >>> import mindspore.dataset as ds
1566
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1458
1567
  >>> iterator = dataset.create_tuple_iterator()
1459
1568
  >>> for item in iterator:
1460
1569
  ... # item is a list
@@ -1476,17 +1585,19 @@ class Dataset:
1476
1585
 
1477
1586
  Args:
1478
1587
  num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
1479
- Default: -1, iterator can be iterated infinite number of epochs.
1588
+ Default: ``-1`` , iterator can be iterated infinite number of epochs.
1480
1589
  output_numpy (bool, optional): Whether or not to output NumPy datatype,
1481
- if output_numpy=False, iterator will output MSTensor. Default: False.
1482
- do_copy (bool, optional): When output data type is mindspore.Tensor,
1483
- use this param to select the conversion method, only take False for better performance. Default: True.
1590
+ if `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False`` .
1591
+ do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
1592
+ use this param to select the conversion method, only take False for better performance.
1593
+ Default: ``True`` .
1484
1594
 
1485
1595
  Returns:
1486
- Iterator, dictionary iterator over the dataset.
1596
+ Iterator, a dataset iterator that returns data of type Dict.
1487
1597
 
1488
1598
  Examples:
1489
- >>> # dataset is an instance object of Dataset
1599
+ >>> import mindspore.dataset as ds
1600
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1490
1601
  >>> iterator = dataset.create_dict_iterator()
1491
1602
  >>> for item in iterator:
1492
1603
  ... # item is a dict
@@ -1515,7 +1626,8 @@ class Dataset:
1515
1626
  int, tuple of the input index information.
1516
1627
 
1517
1628
  Examples:
1518
- >>> # dataset is an instance object of Dataset
1629
+ >>> import mindspore.dataset as ds
1630
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1519
1631
  >>> # set input_indexs
1520
1632
  >>> dataset.input_indexs = 10
1521
1633
  >>> print(dataset.input_indexs)
@@ -1542,11 +1654,14 @@ class Dataset:
1542
1654
  def copy_batch_size(self, value):
1543
1655
  self._batch_size = value
1544
1656
 
1545
- def _init_tree_getters(self):
1657
+ def _init_tree_getters(self, getter_mode=True):
1546
1658
  """
1547
1659
  Get pipeline information.
1660
+
1661
+ Args:
1662
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``True``.
1548
1663
  """
1549
- ir_tree, api_tree = self.create_ir_tree()
1664
+ ir_tree, api_tree = self.create_ir_tree(getter_mode)
1550
1665
 
1551
1666
  runtime_context = cde.PythonRuntimeContext()
1552
1667
  runtime_context.Init()
@@ -1576,8 +1691,12 @@ class Dataset:
1576
1691
  list, list of column names in the dataset.
1577
1692
 
1578
1693
  Examples:
1579
- >>> # dataset is an instance object of Dataset
1694
+ >>> import mindspore.dataset as ds
1695
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1580
1696
  >>> col_names = dataset.get_col_names()
1697
+ >>> print(col_names)
1698
+ ['column1']
1699
+
1581
1700
  """
1582
1701
  if self._col_names is None:
1583
1702
  runtime_getter = self._init_tree_getters()
@@ -1591,22 +1710,26 @@ class Dataset:
1591
1710
  Get the shapes of output data.
1592
1711
 
1593
1712
  Args:
1594
- estimate (bool): If `estimate` is False, will return the shapes of first data row.
1713
+ estimate (bool): If `estimate` is ``False`` , will return the shapes of first data row.
1595
1714
  Otherwise, will iterate the whole dataset and return the estimated shapes of data row,
1596
- where dynamic shape is marked as None (used in dynamic data shapes scenario). Default: False.
1715
+ where dynamic shape is marked as None (used in dynamic data shapes scenario).
1716
+ Default: ``False`` .
1597
1717
 
1598
1718
  Returns:
1599
1719
  list, list of shapes of each column.
1600
1720
 
1601
1721
  Examples:
1722
+ >>> import mindspore.dataset as ds
1602
1723
  >>> import numpy as np
1603
1724
  >>>
1604
1725
  >>> def generator1():
1605
1726
  ... for i in range(1, 100):
1606
- ... yield np.ones((16, i, 83)), np.array(i)
1727
+ ... yield np.ones((16, 83, 83)), np.array([i])
1607
1728
  >>>
1608
1729
  >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
1609
1730
  >>> output_shapes = dataset.output_shapes()
1731
+ >>> print(output_shapes)
1732
+ [[16, 83, 83], [1]]
1610
1733
  """
1611
1734
  # cache single shape
1612
1735
  if not estimate and self.saved_output_shapes is not None:
@@ -1641,8 +1764,17 @@ class Dataset:
1641
1764
  list, list of data types.
1642
1765
 
1643
1766
  Examples:
1644
- >>> # dataset is an instance object of Dataset
1767
+ >>> import mindspore.dataset as ds
1768
+ >>> import numpy as np
1769
+ >>>
1770
+ >>> def generator1():
1771
+ ... for i in range(1, 100):
1772
+ ... yield np.ones((16, 83, 83)).astype(np.float32), np.array([i]).astype(np.int32)
1773
+ >>>
1774
+ >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
1645
1775
  >>> output_types = dataset.output_types()
1776
+ >>> print(output_types)
1777
+ [dtype('float32'), dtype('int32')]
1646
1778
  """
1647
1779
  if self.saved_output_types is None:
1648
1780
  runtime_getter = self._init_tree_getters()
@@ -1666,8 +1798,18 @@ class Dataset:
1666
1798
  int, number of batches.
1667
1799
 
1668
1800
  Examples:
1669
- >>> # dataset is an instance object of Dataset
1801
+ >>> import mindspore.dataset as ds
1802
+ >>> import numpy as np
1803
+ >>>
1804
+ >>> # A generator return 66 samples
1805
+ >>> def generator1():
1806
+ ... for i in range(66):
1807
+ ... yield np.ones((16, 83, 83)), np.array([i])
1808
+ >>>
1809
+ >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
1670
1810
  >>> dataset_size = dataset.get_dataset_size()
1811
+ >>> print(dataset_size)
1812
+ 66
1671
1813
  """
1672
1814
  if self.dataset_size is None:
1673
1815
  runtime_getter = self.__init_size_getter()
@@ -1685,7 +1827,11 @@ class Dataset:
1685
1827
  int, number of classes.
1686
1828
 
1687
1829
  Examples:
1688
- >>> # dataset is an instance object of Dataset
1830
+ >>> import mindspore.dataset as ds
1831
+ >>> # Read image files
1832
+ >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
1833
+ >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
1834
+ >>> # Check how many classes exist in image folder
1689
1835
  >>> num_classes = dataset.num_classes()
1690
1836
  """
1691
1837
  if self._num_classes is None:
@@ -1718,19 +1864,18 @@ class Dataset:
1718
1864
  Args:
1719
1865
  condition_name (str): The condition name that is used to toggle sending next row.
1720
1866
  num_batch (Union[int, None]): The number of batches (rows) that are released.
1721
- When num_batch is None, it will default to the number specified by the
1722
- sync_wait operation. Default: None.
1723
- data (Any): The data passed to the callback, user defined. Default: None.
1867
+ When `num_batch` is ``None``, it will default to the number specified by the
1868
+ `sync_wait` operation. Default: ``None``.
1869
+ data (Any): The data passed to the callback, user defined. Default: ``None``.
1724
1870
 
1725
1871
  Examples:
1726
1872
  >>> import numpy as np
1727
- >>>
1873
+ >>> import mindspore.dataset as ds
1728
1874
  >>>
1729
1875
  >>> def gen():
1730
1876
  ... for i in range(100):
1731
1877
  ... yield (np.array(i),)
1732
1878
  >>>
1733
- >>>
1734
1879
  >>> class Augment:
1735
1880
  ... def __init__(self, loss):
1736
1881
  ... self.loss = loss
@@ -1741,7 +1886,6 @@ class Dataset:
1741
1886
  ... def update(self, data):
1742
1887
  ... self.loss = data["loss"]
1743
1888
  >>>
1744
- >>>
1745
1889
  >>> batch_size = 10
1746
1890
  >>> dataset = ds.GeneratorDataset(gen, column_names=["input"])
1747
1891
  >>> aug = Augment(0)
@@ -1780,8 +1924,12 @@ class Dataset:
1780
1924
  int, the batch size of data.
1781
1925
 
1782
1926
  Examples:
1783
- >>> # dataset is an instance object of Dataset
1927
+ >>> import mindspore.dataset as ds
1928
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1929
+ >>> dataset = dataset.batch(2)
1784
1930
  >>> batch_size = dataset.get_batch_size()
1931
+ >>> print(batch_size)
1932
+ 2
1785
1933
  """
1786
1934
  if self._batch_size is None:
1787
1935
  runtime_getter = self._init_tree_getters()
@@ -1792,14 +1940,18 @@ class Dataset:
1792
1940
 
1793
1941
  def get_repeat_count(self):
1794
1942
  """
1795
- Get the replication times in RepeatDataset. Default: 1.
1943
+ Get the replication times in RepeatDataset. Default: ``1`` .
1796
1944
 
1797
1945
  Returns:
1798
1946
  int, the count of repeat.
1799
1947
 
1800
1948
  Examples:
1801
- >>> # dataset is an instance object of Dataset
1949
+ >>> import mindspore.dataset as ds
1950
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
1951
+ >>> dataset = dataset.repeat(5)
1802
1952
  >>> repeat_count = dataset.get_repeat_count()
1953
+ >>> print(repeat_count)
1954
+ 5
1803
1955
  """
1804
1956
  if self._repeat_count is None:
1805
1957
  runtime_getter = self._init_tree_getters()
@@ -1810,15 +1962,19 @@ class Dataset:
1810
1962
 
1811
1963
  def get_class_indexing(self):
1812
1964
  """
1813
- Return the class index.
1965
+ Get the mapping dictionary from category names to category indexes.
1966
+
1967
+ This dictionary can be used to look up which category name corresponds to a particular category index.
1814
1968
 
1815
1969
  Returns:
1816
- dict, a str-to-int mapping from label name to index.
1817
- dict, a str-to-list<int> mapping from label name to index for Coco ONLY. The second number
1818
- in the list is used to indicate the super category.
1970
+ Dict[str, int], the mappings from category names to category indexes.
1819
1971
 
1820
1972
  Examples:
1821
- >>> # dataset is an instance object of Dataset
1973
+ >>> import mindspore.dataset as ds
1974
+ >>> # Read image files
1975
+ >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
1976
+ >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
1977
+ >>> # Check how many classes exist in image folder
1822
1978
  >>> class_indexing = dataset.get_class_indexing()
1823
1979
  """
1824
1980
  if self.children:
@@ -1830,6 +1986,7 @@ class Dataset:
1830
1986
  Reset the dataset for next epoch.
1831
1987
 
1832
1988
  Examples:
1989
+ >>> import mindspore.dataset as ds
1833
1990
  >>> mind_dataset_dir = ["/path/to/mind_dataset_file"]
1834
1991
  >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
1835
1992
  >>> for _ in range(5):
@@ -1882,6 +2039,13 @@ class Dataset:
1882
2039
  shard_id = 0
1883
2040
  return num_shards, shard_id
1884
2041
 
2042
+ def pre_parse(self, getter_mode):
2043
+ if getter_mode:
2044
+ if hasattr(self, "python_multiprocessing"):
2045
+ self.python_multiprocessing = False
2046
+ if hasattr(self, "num_parallel_workers"):
2047
+ self.num_parallel_workers = 1
2048
+
1885
2049
  def post_parse(self, ir_node):
1886
2050
  if self.cache:
1887
2051
  ir_node = ir_node.set_cache_client(self.cache.cache_client)
@@ -1890,6 +2054,18 @@ class Dataset:
1890
2054
 
1891
2055
  return ir_node
1892
2056
 
2057
+ def set_init_step(self, init_step):
2058
+ self._global_step = init_step
2059
+
2060
+ def get_init_step(self):
2061
+ if self._global_step is not None:
2062
+ return self._global_step
2063
+ if len(self.children) == 1:
2064
+ return self.children[0].get_init_step()
2065
+ # When there are multiple children, we cannot tell from which child to get the initial step,
2066
+ # so we initialize from the beginning
2067
+ return 0
2068
+
1893
2069
 
1894
2070
  class VisionBaseDataset(Dataset):
1895
2071
  """
@@ -1968,9 +2144,9 @@ class TextBaseDataset(Dataset):
1968
2144
  Returns:
1969
2145
  SentencePieceVocab, vocab built from the dataset.
1970
2146
  """
1971
- warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
2147
+ warnings.warn("mindspore.dataset.Dataset.build_sentencepiece_vocab is deprecated from version 2.0 "
1972
2148
  "and will be removed in a future version. "
1973
- "Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
2149
+ "Use mindspore.dataset.text.SentencePieceVocab.from_dataset instead.", DeprecationWarning)
1974
2150
 
1975
2151
  def _build_vocab(self, columns, freq_range, top_k, special_tokens, special_first):
1976
2152
  """
@@ -2193,8 +2369,11 @@ class MappableDataset(SourceDataset):
2193
2369
  new_sampler (Sampler): The child sampler to be added.
2194
2370
 
2195
2371
  Examples:
2372
+ >>> import mindspore.dataset as ds
2373
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
2374
+ >>>
2196
2375
  >>> new_sampler = ds.DistributedSampler(10, 2)
2197
- >>> dataset.add_sampler(new_sampler) # dataset is an instance of Dataset
2376
+ >>> dataset.add_sampler(new_sampler)
2198
2377
  """
2199
2378
  # Note: By adding a sampler, the sampled IDs will flow to the new_sampler
2200
2379
  # after first passing through the current samplers attached to this dataset.
@@ -2210,7 +2389,9 @@ class MappableDataset(SourceDataset):
2210
2389
  new_sampler (Sampler): The new sampler to replace with.
2211
2390
 
2212
2391
  Examples:
2213
- >>> # dataset is an instance object of Dataset
2392
+ >>> import mindspore.dataset as ds
2393
+ >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
2394
+ >>>
2214
2395
  >>> # use a DistributedSampler instead
2215
2396
  >>> new_sampler = ds.DistributedSampler(10, 2)
2216
2397
  >>> dataset.use_sampler(new_sampler)
@@ -2251,24 +2432,25 @@ class MappableDataset(SourceDataset):
2251
2432
  - The sum of split sizes > K, the difference will be removed from the first large
2252
2433
  enough split such that it will have at least 1 row after removing the difference.
2253
2434
 
2254
- randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
2255
- If True, the data will be randomly split. Otherwise, each split will be created with
2435
+ randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
2436
+ If ``True``, the data will be randomly split. Otherwise, each split will be created with
2256
2437
  consecutive rows from the dataset.
2257
2438
 
2258
2439
  Note:
2259
2440
  1. There is an optimized split function, which will be called automatically when the dataset
2260
2441
  that calls this function is a MappableDataset.
2261
2442
  2. Dataset should not be sharded if split is going to be called. Instead, create a
2262
- DistributedSampler and specify a split to shard after splitting. If the dataset is
2263
- sharded after a split, it is strongly recommended setting the same seed in each instance
2264
- of execution, otherwise each shard may not be part of the same split (see Examples).
2265
- 3. It is strongly recommended to not shuffle the dataset, but use randomize=True instead.
2443
+ :class:`mindspore.dataset.DistributedSampler` and specify a split to shard after splitting.
2444
+ If the dataset is sharded after a split, it is strongly recommended setting the same
2445
+ seed in each instance of execution, otherwise each shard may not be part of the same
2446
+ split (see Examples).
2447
+ 3. It is strongly recommended to not shuffle the dataset, but set `randomize` to ``True`` instead.
2266
2448
  Shuffling the dataset may not be deterministic, which means the data in each split
2267
2449
  will be different in each epoch. Furthermore, if sharding occurs after split, each
2268
2450
  shard may not be part of the same split.
2269
2451
 
2270
2452
  Returns:
2271
- tuple(Dataset), a tuple of datasets that have been split.
2453
+ Tuple[Dataset], a tuple of new datasets split from the original one.
2272
2454
 
2273
2455
  Raises:
2274
2456
  RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -2280,7 +2462,9 @@ class MappableDataset(SourceDataset):
2280
2462
  floats don't sum to 1.
2281
2463
 
2282
2464
  Examples:
2465
+ >>> import mindspore.dataset as ds
2283
2466
  >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
2467
+ >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
2284
2468
  >>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir, shuffle=False)
2285
2469
  >>>
2286
2470
  >>> # Set the seed, and tell split to use this seed when randomizing.
@@ -2348,7 +2532,7 @@ class BucketBatchByLengthDataset(UnionBaseDataset):
2348
2532
  self.pad_to_bucket_boundary, self.drop_remainder)
2349
2533
 
2350
2534
 
2351
- def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
2535
+ def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
2352
2536
  """
2353
2537
  Check sufficient shared memory is available for shared memory queues
2354
2538
  when training in parallel mode.
@@ -2358,10 +2542,10 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
2358
2542
  device_num = _get_device_num()
2359
2543
  # In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
2360
2544
  # on the ascend server is 8.
2361
- if device_num > 1 and context.get_context("device_target") == "Ascend":
2545
+ if device_num > 1:
2362
2546
  device_num = min(device_num, 8)
2363
- shm_estimate_usage = device_num * num_worker * num_queues * \
2364
- (queue_size + 2) * max_rowsize * 1024 * 1024
2547
+ shm_estimate_usage = device_num * num_worker * \
2548
+ (queue_size + 2) * (in_rowsize + out_rowsize) * 1024 * 1024
2365
2549
  try:
2366
2550
  shm_available = psutil.disk_usage('/dev/shm').free
2367
2551
  if shm_estimate_usage >= threshold_ratio * shm_available:
@@ -2385,10 +2569,10 @@ class BatchDataset(UnionBaseDataset):
2385
2569
  batch_size (Union[int, function]): The number of rows each batch is created with. An
2386
2570
  int or callable which takes exactly 1 parameter, BatchInfo.
2387
2571
  drop_remainder (bool, optional): Determines whether or not to drop the last
2388
- possibly incomplete batch. Default: False. If True, and if there are less
2572
+ possibly incomplete batch. Default: ``False``. If True, and if there are less
2389
2573
  than batch_size rows available to make the last batch, then those rows will
2390
2574
  be dropped and not propagated to the child node.
2391
- num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
2575
+ num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
2392
2576
  per_batch_map (callable, optional): Per batch map callable. A callable which takes
2393
2577
  (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch of
2394
2578
  Tensors on a given column. The number of lists should match with number of entries in input_columns. The
@@ -2398,10 +2582,16 @@ class BatchDataset(UnionBaseDataset):
2398
2582
  output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
2399
2583
  the last operation. This parameter is mandatory if len(input_columns) !=
2400
2584
  len(output_columns). The size of this list must match the number of output
2401
- columns of the last operation. Default: None, output columns will have the same
2585
+ columns of the last operation. Default: ``None``, output columns will have the same
2402
2586
  name as the input columns, i.e., the columns will be replaced.
2403
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
2404
- data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
2587
+ max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
2588
+ allocation to copy data between processes, the total occupied shared memory will increase as
2589
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
2590
+ used if python_multiprocessing is set to True. If it is an int value, it represents
2591
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
2592
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
2593
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
2594
+ to create shared memory. Default: 16.
2405
2595
 
2406
2596
  """
2407
2597
 
@@ -2427,7 +2617,10 @@ class BatchDataset(UnionBaseDataset):
2427
2617
 
2428
2618
  self.python_multiprocessing = python_multiprocessing
2429
2619
  self.process_pool = None
2430
- self.max_rowsize = max_rowsize
2620
+ if isinstance(max_rowsize, int):
2621
+ self.max_rowsize = [max_rowsize * self.batch_size] * 2
2622
+ else:
2623
+ self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
2431
2624
 
2432
2625
  def __del__(self):
2433
2626
  if hasattr(self, "process_pool") and self.process_pool is not None:
@@ -2497,7 +2690,7 @@ class BatchDataset(UnionBaseDataset):
2497
2690
  self.num_parallel_workers = get_num_parallel_workers()
2498
2691
 
2499
2692
  self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, [self.per_batch_map],
2500
- self.max_rowsize * self.batch_size)
2693
+ self.max_rowsize)
2501
2694
  # Wrap per_batch_map into _PythonCallable
2502
2695
  self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
2503
2696
  else:
@@ -2507,19 +2700,53 @@ class BatchDataset(UnionBaseDataset):
2507
2700
 
2508
2701
  class BatchInfo(cde.CBatchInfo):
2509
2702
  """
2510
- Only the batch size function and per_batch_map of the batch operation can dynamically adjust parameters
2511
- based on the number of batches and epochs during training.
2703
+ This class helps to get dataset information dynamically when the input of `batch_size` or `per_batch_map`
2704
+ in `batch` operation is a callable object.
2512
2705
  """
2513
2706
 
2514
2707
  def get_batch_num(self):
2515
2708
  """
2516
- Return the batch number of the current batch.
2709
+ Return the batch number being processed in current epoch, start from 0.
2710
+
2711
+ Examples:
2712
+ >>> # Create a dataset where its batch size is dynamic
2713
+ >>> # Define a callable batch size function and let batch size increase 1 each time.
2714
+ >>> import mindspore.dataset as ds
2715
+ >>> from mindspore.dataset import BatchInfo
2716
+ >>>
2717
+ >>> dataset = ds.GeneratorDataset([i for i in range(3)], "column1", shuffle=False)
2718
+ >>> def add_one(BatchInfo):
2719
+ ... return BatchInfo.get_batch_num() + 1
2720
+ >>> dataset = dataset.batch(batch_size=add_one)
2721
+ >>> print(list(dataset))
2722
+ [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[2], dtype=Int64, value= [1, 2])]]
2517
2723
  """
2518
2724
  return
2519
2725
 
2520
2726
  def get_epoch_num(self):
2521
2727
  """
2522
- Return the epoch number of the current batch.
2728
+ Return the epoch number, start from 0.
2729
+
2730
+ Examples:
2731
+ >>> # Create a dataset where its batch size is dynamic
2732
+ >>> # Define a callable batch size function and let batch size increase 1 each epoch.
2733
+ >>> import mindspore.dataset as ds
2734
+ >>> from mindspore.dataset import BatchInfo
2735
+ >>>
2736
+ >>> dataset = ds.GeneratorDataset([i for i in range(4)], "column1", shuffle=False)
2737
+ >>> def add_one_by_epoch(BatchInfo):
2738
+ ... return BatchInfo.get_epoch_num() + 1
2739
+ >>> dataset = dataset.batch(batch_size=add_one_by_epoch)
2740
+ >>>
2741
+ >>> result = []
2742
+ >>> epoch = 2
2743
+ >>> iterator = dataset.create_tuple_iterator(num_epochs=epoch)
2744
+ >>> for i in range(epoch):
2745
+ ... result.extend(list(iterator))
2746
+ >>> # result:
2747
+ >>> # [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[1], dtype=Int64, value= [1])],
2748
+ >>> # [Tensor(shape=[1], dtype=Int64, value= [2])], [Tensor(shape=[1], dtype=Int64, value= [3])],
2749
+ >>> # [Tensor(shape=[2], dtype=Int64, value= [0, 1])], [Tensor(shape=[2], dtype=Int64, value= [2, 3])]]
2523
2750
  """
2524
2751
  return
2525
2752
 
@@ -2530,7 +2757,7 @@ class BlockReleasePair:
2530
2757
 
2531
2758
  Args:
2532
2759
  init_release_rows (int): Number of lines to allow through the pipeline.
2533
- callback (function): The callback function that will be called when release is called. Default: None.
2760
+ callback (function): The callback function that will be called when release is called. Default: ``None``.
2534
2761
  """
2535
2762
 
2536
2763
  def __init__(self, init_release_rows, callback=None):
@@ -2602,10 +2829,10 @@ class PaddedBatchDataset(UnionBaseDataset):
2602
2829
  batch_size (Union[int, function]): The number of rows each batch is created with. An
2603
2830
  int or callable which takes exactly 1 parameter, BatchInfo.
2604
2831
  drop_remainder (bool, optional): Determines whether or not to drop the last
2605
- possibly incomplete batch. Default: False. If True, and if there are less
2832
+ possibly incomplete batch. Default: ``False``. If True, and if there are less
2606
2833
  than batch_size rows available to make the last batch, then those rows will
2607
2834
  be dropped and not propagated to the child node.
2608
- num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
2835
+ num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
2609
2836
  pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
2610
2837
  will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
2611
2838
  """
@@ -2675,7 +2902,7 @@ class SyncWaitDataset(UnionBaseDataset):
2675
2902
  input_dataset (Dataset): Input dataset to apply flow control.
2676
2903
  num_batch (int): Number of batches without blocking at the start of each epoch.
2677
2904
  condition_name (str): Condition name that is used to toggle sending next row.
2678
- callback (function): Callback function that will be invoked when sync_update is called. Default: None.
2905
+ callback (function): Callback function that will be invoked when sync_update is called. Default: ``None``.
2679
2906
 
2680
2907
  Raises:
2681
2908
  RuntimeError: If condition name already exists.
@@ -2782,24 +3009,11 @@ class _PythonCallable:
2782
3009
  self.pool = pool
2783
3010
  # Python callable index
2784
3011
  self.idx = idx
2785
- self.check_interval = get_multiprocessing_timeout_interval()
2786
3012
 
2787
3013
  def __call__(self, *args):
2788
3014
  result = None
2789
- start_time = time.time()
2790
- count = 1
2791
3015
  get_data_from_worker_process = False
2792
3016
  while get_data_from_worker_process is False:
2793
- cost_time = time.time() - start_time
2794
- if cost_time > (self.check_interval * count):
2795
- logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
2796
- "workers of map operation cost long time to process next data. "
2797
- "Worker process list are: " + str(self.pool.get_pids()) + ", you can use "
2798
- "\"py-spy dump -p {PID} -l -s \""
2799
- "to dump the worker process stack. You can also set the timeout interval by "
2800
- "ds.config.set_multiprocessing_interval to adjust the output frequency of this "
2801
- "log.")
2802
- count += 1
2803
3017
  if self.pool.is_running() and check_iterator_cleanup() is False:
2804
3018
  try:
2805
3019
  result = self.pool.execute(self.idx, *args)
@@ -2820,6 +3034,7 @@ class _PythonCallable:
2820
3034
  return self.py_callable.to_json()
2821
3035
 
2822
3036
 
3037
+ # used when python_multiprocessing=True in map
2823
3038
  class Pipe:
2824
3039
  """
2825
3040
  Class to handle communication between the master process and the worker processes.
@@ -2829,29 +3044,34 @@ class Pipe:
2829
3044
  self.shared_memory = shared_memory
2830
3045
  self.eof = multiprocessing.Event()
2831
3046
  if self.shared_memory:
2832
- self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
2833
- self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
3047
+ self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
3048
+ self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
2834
3049
  else:
2835
3050
  self.in_queue = _Queue(1)
2836
3051
  self.res_queue = _Queue(1)
2837
- self.in_queue._joincancelled = True # pylint: disable=W0212
2838
- self.res_queue._joincancelled = True # pylint: disable=W0212
3052
+ self.in_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
2839
3053
 
2840
3054
  def master_send(self, func_index, data):
2841
3055
  self.in_queue.put_nowait((func_index, *data))
2842
3056
 
2843
3057
  def master_receive(self):
2844
- return self.res_queue.get_until(timeout=1, exit_signal=self.eof)
3058
+ if self.eof is None:
3059
+ raise RuntimeError("EOF is none when get data from worker.")
3060
+ if self.eof.is_set():
3061
+ return None
3062
+ return self.res_queue.get(timeout=1)
2845
3063
 
2846
3064
  def master_close(self):
2847
3065
  self.eof.set()
3066
+ self.send_finish_signal_to_worker()
2848
3067
  self.send_finish_signal()
2849
- self.res_queue.cancel_join_thread()
2850
- self.in_queue.cancel_join_thread()
2851
3068
 
2852
3069
  def send_finish_signal(self):
2853
3070
  self.worker_send(None)
2854
3071
 
3072
+ def send_finish_signal_to_worker(self):
3073
+ self.master_send(0, "QUIT")
3074
+
2855
3075
  def worker_send(self, data):
2856
3076
  self.res_queue.put_until(data, timeout=1, exit_signal=self.eof)
2857
3077
 
@@ -2864,10 +3084,6 @@ class Pipe:
2864
3084
  func_index, *data = result
2865
3085
  return func_index, tuple(data)
2866
3086
 
2867
- def worker_close(self):
2868
- self.res_queue.cancel_join_thread()
2869
- self.in_queue.cancel_join_thread()
2870
-
2871
3087
 
2872
3088
  def _main_process_already_exit():
2873
3089
  """
@@ -2885,6 +3101,8 @@ def _worker_loop(operations, pipe, seed=get_seed()):
2885
3101
  """
2886
3102
  Multiprocess worker process loop.
2887
3103
  """
3104
+ # Ensure that the process does not hung when exiting
3105
+ pipe.res_queue.cancel_join_thread()
2888
3106
 
2889
3107
  def _ignore_sigint():
2890
3108
  """
@@ -2900,9 +3118,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
2900
3118
 
2901
3119
  result = pipe.worker_receive()
2902
3120
  if result is None:
2903
- pipe.worker_close()
2904
3121
  return
2905
3122
  (idx, input_tensors) = result
3123
+ if input_tensors == "QUIT":
3124
+ break
2906
3125
  try:
2907
3126
  output_tensors = operations[idx](*input_tensors)
2908
3127
 
@@ -2911,6 +3130,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
2911
3130
  pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
2912
3131
  # Do not return
2913
3132
 
3133
+ # release the queue when stop the worker by master
3134
+ del pipe.in_queue
3135
+ del pipe.res_queue
3136
+
2914
3137
 
2915
3138
  def worker_target(operations, seed=get_seed()):
2916
3139
  return lambda pipe: _worker_loop(operations, pipe, seed)
@@ -2924,20 +3147,54 @@ class _MPWorker(multiprocessing.Process):
2924
3147
  def __init__(self, operations, warning_ctl, max_rowsize=16, seed=get_seed()):
2925
3148
  shared_memory = get_enable_shared_mem()
2926
3149
  self.pipe = Pipe(warning_ctl, shared_memory=shared_memory, max_rowsize=max_rowsize)
3150
+ self.check_interval = get_multiprocessing_timeout_interval()
2927
3151
  super().__init__(target=worker_target(operations, seed), args=(self.pipe,), daemon=True)
2928
3152
 
2929
3153
  def execute(self, idx, *args):
3154
+ """Acquiring data from a worker in an infinite loop"""
2930
3155
  self.pipe.master_send(idx, args)
2931
- res = self.pipe.master_receive()
2932
- if isinstance(res, ExceptionHandler):
2933
- res.reraise()
2934
- return res
3156
+ time_s = time.time()
3157
+ wait_count = 1
3158
+ while True:
3159
+ cost_time = time.time() - time_s
3160
+ if cost_time / self.check_interval >= wait_count:
3161
+ wait_count += 1
3162
+ logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
3163
+ "worker of the map operation is hanging. "
3164
+ "Check whether the user defined data transform is too slow or the "
3165
+ "output data is too large. You can also set the timeout interval by "
3166
+ "ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
3167
+ "of this log.")
3168
+ pid = self.pid
3169
+ logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
3170
+ install_status, _ = subprocess.getstatusoutput("py-spy --version")
3171
+ if install_status == 0:
3172
+ stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
3173
+ logger.warning("Map worker subprocess stack:\n{}".format(stack))
3174
+ else:
3175
+ logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
3176
+ try:
3177
+ res = self.pipe.master_receive()
3178
+ except queue.Empty:
3179
+ continue
3180
+ if res is None:
3181
+ # receive finish signal
3182
+ return None
3183
+ if isinstance(res, ExceptionHandler):
3184
+ res.reraise()
3185
+ return res
2935
3186
 
2936
3187
  def close(self):
2937
3188
  try:
2938
3189
  if self.is_alive():
3190
+ # release the eager executor which is used by current process
3191
+ transforms.transforms.clean_unused_executors()
3192
+
2939
3193
  logger.info(f"Closing worker with PID: {self.pid}")
2940
3194
  self.pipe.master_close()
3195
+ # del the handle which hold by master
3196
+ del self.pipe.in_queue
3197
+ del self.pipe.res_queue
2941
3198
  super().terminate()
2942
3199
  super().join()
2943
3200
  super().close()
@@ -2965,6 +3222,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
2965
3222
  """
2966
3223
 
2967
3224
  def __init__(self):
3225
+ self.origin_hook = sys.excepthook
2968
3226
  sys.excepthook = self.__handler_exception
2969
3227
 
2970
3228
  @staticmethod
@@ -2976,15 +3234,15 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
2976
3234
  time.sleep(3)
2977
3235
 
2978
3236
  def __handler_exception(self, ex_type, value, tb):
2979
- logger.critical("Uncaught exception: ", exc_info=(ex_type, value, tb))
3237
+ self.origin_hook(ex_type, value, tb)
2980
3238
  self.mp_pool_exit_preprocess()
2981
3239
 
2982
- def __init__(self, op_name, num_parallel_workers, operations, max_row_size=16):
3240
+ def __init__(self, op_name, num_parallel_workers, operations, max_rowsize=16):
2983
3241
  super(_PythonMultiprocessing, self).__init__()
2984
3242
  self.op_name = op_name
2985
3243
  self.num_parallel_workers = num_parallel_workers
2986
3244
  self.operations = operations
2987
- self.max_row_size = max_row_size
3245
+ self.max_rowsize = max_rowsize
2988
3246
 
2989
3247
  self.workers = None
2990
3248
  self.pids = None
@@ -3056,6 +3314,9 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3056
3314
  "ds.config.set_enable_watchdog(False) to block this error.")
3057
3315
  os.kill(os.getpid(), signal.SIGTERM)
3058
3316
 
3317
+ # release the workers
3318
+ del workers
3319
+
3059
3320
  @staticmethod
3060
3321
  def _terminate_processes(processes):
3061
3322
  """Terminate subprocesses"""
@@ -3141,6 +3402,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3141
3402
  time.sleep(0.1)
3142
3403
 
3143
3404
  _PythonMultiprocessing._terminate_processes(workers)
3405
+ del workers
3144
3406
  os.kill(os.getpid(), signal.SIGTERM)
3145
3407
 
3146
3408
  def launch(self, op_id=-1):
@@ -3171,7 +3433,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3171
3433
 
3172
3434
  """
3173
3435
  if get_enable_shared_mem():
3174
- self.check_shared_memory()
3436
+ _check_shm_usage(self.num_parallel_workers, 1, self.max_rowsize[0], self.max_rowsize[1])
3175
3437
 
3176
3438
  if self.workers is not None:
3177
3439
  raise Exception("Pool was already created, close it first.")
@@ -3183,7 +3445,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3183
3445
  self.workers = []
3184
3446
  self.warning_ctl = multiprocessing.Value('i', 0)
3185
3447
  for i in range(self.num_parallel_workers):
3186
- worker = _MPWorker(self.operations, self.warning_ctl, self.max_row_size, i + get_seed())
3448
+ worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, i + get_seed())
3187
3449
  worker.start()
3188
3450
  self.workers.append(worker)
3189
3451
 
@@ -3197,8 +3459,11 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3197
3459
  atexit.register(self.terminate)
3198
3460
 
3199
3461
  def terminate(self):
3200
- self.close_all_workers()
3462
+ # close watch dog first and then close all the workers
3201
3463
  self.abort_watchdog()
3464
+ self.close_all_workers()
3465
+ if hasattr(self, "warning_ctl"):
3466
+ del self.warning_ctl
3202
3467
 
3203
3468
  def get_pids(self):
3204
3469
  """
@@ -3242,12 +3507,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3242
3507
  def is_mp_enabled(self):
3243
3508
  return self.workers is not None
3244
3509
 
3245
- def check_shared_memory(self):
3246
- """
3247
- Check if there is enough shared memory in the system.
3248
- """
3249
- _check_shm_usage(self.num_parallel_workers, 1, self.max_row_size, 2)
3250
-
3251
3510
  def execute(self, idx, *args):
3252
3511
  """
3253
3512
  Execute
@@ -3294,6 +3553,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3294
3553
  self._abort_watchdog()
3295
3554
  if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
3296
3555
  _PythonMultiprocessing._terminate_processes([self.cleaning_process])
3556
+ del self.cleaning_process
3297
3557
 
3298
3558
  def is_running(self):
3299
3559
  if hasattr(self, 'workers') and self.workers is not None:
@@ -3301,9 +3561,34 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3301
3561
  return False
3302
3562
 
3303
3563
  def close_all_workers(self):
3564
+ """Close all the subprocess workers"""
3304
3565
  if hasattr(self, 'workers') and self.workers is not None:
3305
3566
  for w in self.workers:
3306
3567
  w.close()
3568
+ check_interval = get_multiprocessing_timeout_interval()
3569
+ for w in self.workers:
3570
+ try:
3571
+ subprocess_file_descriptor = w.sentinel
3572
+ st = time.time()
3573
+ while _PythonMultiprocessing.is_process_alive(w.pid):
3574
+ time.sleep(0.01) # sleep 10ms, waiting for the subprocess exit
3575
+ if time.time() - st > check_interval:
3576
+ logger.warning("Waiting for the subprocess worker [{}] to exit.".format(w.pid))
3577
+ st += check_interval
3578
+ except ValueError as e:
3579
+ if "process object is closed" in str(e):
3580
+ continue
3581
+ raise e
3582
+ try:
3583
+ if w.is_alive():
3584
+ os.close(subprocess_file_descriptor)
3585
+ except OSError as e:
3586
+ # Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
3587
+ if "Bad file descriptor" not in str(e):
3588
+ raise e
3589
+
3590
+ # use clear to release the handle which is better than self.workers = None
3591
+ self.workers.clear()
3307
3592
  self.workers = None
3308
3593
  self.pids = None
3309
3594
 
@@ -3315,24 +3600,29 @@ class MapDataset(UnionBaseDataset):
3315
3600
  Args:
3316
3601
  input_dataset (Dataset): Input Dataset to be mapped.
3317
3602
  operations (Union[list[TensorOperation], list[functions]]): A function mapping a nested structure of tensors
3318
- to another nested structure of tensor. Default: None.
3603
+ to another nested structure of tensor. Default: ``None``.
3319
3604
  input_columns (Union[str, list[str]]): List of names of the input columns.
3320
- Default: None, the operations will be applied on the first columns in the dataset.
3605
+ Default: ``None``, the operations will be applied on the first columns in the dataset.
3321
3606
  The size of the list should match the number of inputs of the first operation.
3322
3607
  output_columns (Union[str, list[str]], optional): List of names of the output columns.
3323
3608
  The size of the list should match the number of outputs of the last operation.
3324
- Default: None, output columns will be the input columns, i.e., the columns will
3609
+ Default: ``None``, output columns will be the input columns, i.e., the columns will
3325
3610
  be replaced.
3326
3611
  num_parallel_workers (int, optional): Number of workers to process the dataset
3327
- in parallel. Default: None.
3612
+ in parallel. Default: ``None``.
3328
3613
  python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
3329
- option could be beneficial if the Python operation is computational heavy. Default: False.
3614
+ option could be beneficial if the Python operation is computational heavy. Default: ``False``.
3330
3615
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
3331
- Default: None, which means no cache is used.
3332
- callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: None.
3333
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
3334
- data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
3335
- offload (bool, optional): Flag to indicate whether offload is used. Default: None.
3616
+ Default: ``None``, which means no cache is used.
3617
+ callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: ``None``.
3618
+ max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
3619
+ allocation to copy data between processes, the total occupied shared memory will increase as
3620
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
3621
+ used if python_multiprocessing is set to True. If it is an int value, it represents ``input_columns`` and
3622
+ ``output_columns`` use this value as the unit to create shared memory. If it is a list, the first element
3623
+ represents the ``input_columns`` use this value as the unit to create shared memory, and the second element
3624
+ represents ``output_columns`` use this value as the unit to create shared memory. Default: 16.
3625
+ offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
3336
3626
  """
3337
3627
 
3338
3628
  def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
@@ -3362,7 +3652,10 @@ class MapDataset(UnionBaseDataset):
3362
3652
  self.process_pool = None
3363
3653
 
3364
3654
  self.callbacks = to_list(callbacks)
3365
- self.max_rowsize = max_rowsize
3655
+ if isinstance(max_rowsize, int):
3656
+ self.max_rowsize = [max_rowsize] * 2
3657
+ else:
3658
+ self.max_rowsize = max_rowsize
3366
3659
  self.offload = offload
3367
3660
 
3368
3661
  def parse(self, children=None):
@@ -3400,7 +3693,7 @@ class MapDataset(UnionBaseDataset):
3400
3693
 
3401
3694
  callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
3402
3695
  return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
3403
- callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3696
+ callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3404
3697
 
3405
3698
  def __deepcopy__(self, memodict):
3406
3699
  return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
@@ -3426,7 +3719,7 @@ class MapDataset(UnionBaseDataset):
3426
3719
  return op_name
3427
3720
 
3428
3721
  @staticmethod
3429
- def __construct_debug_hook(previous_op_name=None):
3722
+ def __construct_debug_hook(previous_op_name=None, is_first_op=False):
3430
3723
  """
3431
3724
  Wrap debug hook into FuncWrapper.
3432
3725
  """
@@ -3437,6 +3730,7 @@ class MapDataset(UnionBaseDataset):
3437
3730
  # making deep copy to allow each debug hook instance hold unique variables
3438
3731
  new_fn = copy.deepcopy(fn)
3439
3732
  new_fn.set_previous_op_name(previous_op_name)
3733
+ new_fn.set_is_first(is_first_op)
3440
3734
  inserted_func = transforms.py_transforms_util.FuncWrapper(new_fn)
3441
3735
  inserted_func.implementation = Implementation.PY
3442
3736
  inserted_functions.append(inserted_func)
@@ -3558,7 +3852,8 @@ class MapDataset(UnionBaseDataset):
3558
3852
  """
3559
3853
  if not get_debug_mode():
3560
3854
  return operations
3561
- inserted_operations = self.__construct_debug_hook()
3855
+ first_op_name = self.__parse_op_name(operations[0])
3856
+ inserted_operations = self.__construct_debug_hook(first_op_name, is_first_op=True)
3562
3857
  for op in operations:
3563
3858
  inserted_operations.append(op)
3564
3859
  op_name = self.__parse_op_name(op)
@@ -3588,9 +3883,9 @@ class FilterDataset(UnionBaseDataset):
3588
3883
  input_dataset (Dataset): Input Dataset to be mapped.
3589
3884
  predicate (callable): Python callable which returns a boolean value. If False then filter the element.
3590
3885
  input_columns (Union[str, list[str]], optional): List of names of the input columns.
3591
- Default: None, the predicate will be applied to all columns in the dataset.
3886
+ Default: ``None``, the predicate will be applied to all columns in the dataset.
3592
3887
  num_parallel_workers (int, optional): Number of workers to process the dataset
3593
- in parallel. Default: None.
3888
+ in parallel. Default: ``None``.
3594
3889
  """
3595
3890
 
3596
3891
  def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None):
@@ -3702,6 +3997,8 @@ class ConcatDataset(UnionBaseDataset):
3702
3997
  "valid samples in the dataset." % child_index)
3703
3998
  child_index += 1
3704
3999
 
4000
+ self._children_sizes = self.children_sizes_.copy()
4001
+
3705
4002
  # _children_flag_and_nums: A list of pair<int ,int>.The first element of pair is flag that characterizes
3706
4003
  # whether the dataset is mappable. The second element of pair is length of the dataset
3707
4004
  self._children_flag_and_nums = []
@@ -3725,7 +4022,8 @@ class ConcatDataset(UnionBaseDataset):
3725
4022
  self._children_flag_and_nums.append((1, dataset_len))
3726
4023
 
3727
4024
  def parse(self, children=None):
3728
- return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_)
4025
+ return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_,
4026
+ self._children_sizes)
3729
4027
 
3730
4028
  def use_sampler(self, sampler):
3731
4029
  """
@@ -3741,8 +4039,19 @@ class ConcatDataset(UnionBaseDataset):
3741
4039
  ValueError: If the parameter NumSamples of sampler is not None.
3742
4040
  ValueError: If num_shards <=0.
3743
4041
  """
3744
- if not isinstance(sampler, samplers.DistributedSampler):
3745
- raise TypeError("The parameter %s of concat must be DistributedSampler!" % sampler)
4042
+ if not isinstance(sampler, (samplers.DistributedSampler, samplers.RandomSampler)):
4043
+ raise TypeError("The parameter %s of concat must be DistributedSampler or RandomSampler!" % sampler)
4044
+
4045
+ if isinstance(sampler, samplers.RandomSampler):
4046
+ if sampler.replacement:
4047
+ raise ValueError("The parameter replacement of RandomSampler must be False!")
4048
+
4049
+ if sampler.get_num_samples() is not None:
4050
+ raise ValueError("The parameter num_samples of RandomSampler is not support to be set!")
4051
+
4052
+ self._sampler = sampler
4053
+ self._children_sizes = [c.get_dataset_size() for c in self.children]
4054
+ return
3746
4055
 
3747
4056
  if sampler.is_shuffled():
3748
4057
  raise ValueError("The parameter shuffle of DistributedSampler must be False!")
@@ -3845,7 +4154,12 @@ class _ToDevice:
3845
4154
  self._runtime_context = cde.PythonRuntimeContext()
3846
4155
  self._runtime_context.Init()
3847
4156
  self._to_device = cde.ToDevice(num_epochs)
3848
- self._to_device.Init(ir_tree)
4157
+ if dataset.get_init_step() != 0:
4158
+ init_step = dataset.get_init_step()
4159
+ dataset_size = dataset.get_dataset_size()
4160
+ self._to_device.Init(ir_tree, init_step, dataset_size)
4161
+ else:
4162
+ self._to_device.Init(ir_tree, 0, -1)
3849
4163
  self._runtime_context.AssignConsumer(self._to_device)
3850
4164
 
3851
4165
  ITERATORS_LIST.append(weakref.ref(self))
@@ -3872,6 +4186,14 @@ class _ToDevice:
3872
4186
  """
3873
4187
  return self._to_device.GetDataInfo()
3874
4188
 
4189
+ def get_send_info(self):
4190
+ """
4191
+ In sink mode, it returns the send information of dataset at this moment.
4192
+ Send information includes number of send batches, time summary of fetching data on host
4193
+ and time summary of sending data.
4194
+ """
4195
+ return self._to_device.GetSendInfo()
4196
+
3875
4197
  def release(self):
3876
4198
  """
3877
4199
  Manually terminate Device Queue instead of relying on out of scope destruction.
@@ -3892,8 +4214,8 @@ class _ToDevice:
3892
4214
  offload_model = GetOffloadModel(self._to_device, col_names)
3893
4215
  return offload_model
3894
4216
 
3895
- def _reset(self, step, epoch):
3896
- self._to_device.Reset(step, epoch)
4217
+ def _reset(self, step, dataset_size):
4218
+ self._to_device.Reset(step, dataset_size)
3897
4219
 
3898
4220
 
3899
4221
  class TransferDataset(Dataset):
@@ -3902,9 +4224,9 @@ class TransferDataset(Dataset):
3902
4224
 
3903
4225
  Args:
3904
4226
  input_dataset (Dataset): Input Dataset to be transferred.
3905
- send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
4227
+ send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: ``True``.
3906
4228
  create_data_info_queue (bool, optional): Whether to create queue which stores
3907
- types and shapes of data or not. Default: False.
4229
+ types and shapes of data or not. Default: ``False``.
3908
4230
 
3909
4231
  Raises:
3910
4232
  TypeError: If device_type is empty.
@@ -3912,9 +4234,14 @@ class TransferDataset(Dataset):
3912
4234
  RuntimeError: If dataset is unknown.
3913
4235
  """
3914
4236
 
3915
- def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False):
4237
+ def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
3916
4238
  super().__init__(children=input_dataset)
3917
- self.queue_name = str(uuid.uuid1())
4239
+ if queue_name == "":
4240
+ self.queue_name = str(uuid.uuid1())
4241
+ logger.info(f"queue_name is newly generated. value is {self.queue_name}")
4242
+ else:
4243
+ self.queue_name = queue_name
4244
+ logger.info(f"queue_name is read from compile cache. value is {self.queue_name}")
3918
4245
  self.device_type = context.get_context("device_target") if context else "CPU"
3919
4246
  self.device_id = context.get_context("device_id") if context else 0
3920
4247
 
@@ -3973,6 +4300,16 @@ class TransferDataset(Dataset):
3973
4300
  return self._to_device.get_data_info()
3974
4301
  raise RuntimeError("Calling get_data_info with bad state.")
3975
4302
 
4303
+ def get_send_info(self):
4304
+ """
4305
+ In sink mode, it returns the send information of dataset at this moment.
4306
+ Send information includes number of send batches, time summary of fetching data on host
4307
+ and time summary of sending data.
4308
+ """
4309
+ if self._to_device is not None:
4310
+ return self._to_device.get_send_info()
4311
+ raise RuntimeError("Calling get_send_info with bad state, data queue is not initialized.")
4312
+
3976
4313
  def get_offload_model(self):
3977
4314
  if self._to_device is not None:
3978
4315
  return self._to_device.get_offload_model(self.column_name)
@@ -3986,10 +4323,10 @@ class TransferDataset(Dataset):
3986
4323
  if self._to_device is not None:
3987
4324
  self._to_device.release()
3988
4325
 
3989
- def _reset(self, step, epoch):
4326
+ def _reset(self, step, dataset_size):
3990
4327
  if self._to_device is not None:
3991
- logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(epoch))
3992
- self._to_device._reset(step, epoch) # pylint: disable=protected-access
4328
+ logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(step // dataset_size))
4329
+ self._to_device._reset(step, dataset_size) # pylint: disable=protected-access
3993
4330
 
3994
4331
 
3995
4332
  class Schema:
@@ -3997,15 +4334,13 @@ class Schema:
3997
4334
  Class to represent a schema of a dataset.
3998
4335
 
3999
4336
  Args:
4000
- schema_file(str): Path of the schema file. Default: None.
4001
-
4002
- Returns:
4003
- Schema object, schema info about dataset.
4337
+ schema_file (str): Path of the schema file. Default: ``None``.
4004
4338
 
4005
4339
  Raises:
4006
4340
  RuntimeError: If schema file failed to load.
4007
4341
 
4008
4342
  Examples:
4343
+ >>> import mindspore.dataset as ds
4009
4344
  >>> from mindspore import dtype as mstype
4010
4345
  >>>
4011
4346
  >>> # Create schema; specify column name, mindspore.dtype and shape of the column
@@ -4027,16 +4362,17 @@ class Schema:
4027
4362
  name (str): The new name of the column.
4028
4363
  de_type (str): Data type of the column.
4029
4364
  shape (list[int], optional): Shape of the column.
4030
- Default: None, [-1] which is an unknown shape of rank 1.
4365
+ Default: ``None``, [-1] which is an unknown shape of rank 1.
4031
4366
 
4032
4367
  Raises:
4033
4368
  ValueError: If column type is unknown.
4034
4369
 
4035
4370
  Examples:
4036
- >>> from mindspore import dtype as mstype
4037
- >>>
4038
- >>> schema = ds.Schema()
4039
- >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
4371
+ >>> import mindspore.dataset as ds
4372
+ >>> from mindspore import dtype as mstype
4373
+ >>>
4374
+ >>> schema = ds.Schema()
4375
+ >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
4040
4376
  """
4041
4377
  if isinstance(de_type, typing.Type):
4042
4378
  de_type = mstype_to_detype(de_type)
@@ -4084,9 +4420,11 @@ class Schema:
4084
4420
 
4085
4421
  Examples:
4086
4422
  >>> from mindspore.dataset import Schema
4423
+ >>> from mindspore import dtype as mstype
4087
4424
  >>>
4088
- >>> schema1 = ds.Schema()
4089
- >>> schema2 = schema1.to_json()
4425
+ >>> schema = Schema()
4426
+ >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
4427
+ >>> json = schema.to_json()
4090
4428
  """
4091
4429
  return self.cpp_schema.to_json()
4092
4430
 
@@ -4104,12 +4442,11 @@ class Schema:
4104
4442
 
4105
4443
  Examples:
4106
4444
  >>> import json
4107
- >>>
4108
4445
  >>> from mindspore.dataset import Schema
4109
4446
  >>>
4110
- >>> with open("/path/to/schema_file") as file:
4447
+ >>> with open("/path/to/schema_file", "r") as file:
4111
4448
  ... json_obj = json.load(file)
4112
- ... schema = ds.Schema()
4449
+ ... schema = Schema()
4113
4450
  ... schema.from_json(json_obj)
4114
4451
  """
4115
4452
  self.cpp_schema.from_string(json.dumps(json_obj, indent=2))