mindspore 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/ConcurrencyCheck.dll +0 -0
  3. mindspore/CppBuildInsights.dll +0 -0
  4. mindspore/CppCoreCheck.dll +0 -0
  5. mindspore/EnumIndex.dll +0 -0
  6. mindspore/EspXEngine.dll +0 -0
  7. mindspore/HResultCheck.dll +0 -0
  8. mindspore/KernelTraceControl.dll +0 -0
  9. mindspore/LocalESPC.dll +0 -0
  10. mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
  11. mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
  12. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  13. mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
  14. mindspore/Newtonsoft.Json.dll +0 -0
  15. mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
  16. mindspore/VariantClear.dll +0 -0
  17. mindspore/__init__.py +9 -4
  18. mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
  19. mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
  20. mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
  21. mindspore/_check_jit_forbidden_api.py +102 -0
  22. mindspore/_checkparam.py +1066 -1001
  23. mindspore/_extends/builtin_operations.py +32 -4
  24. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  25. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  26. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  27. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  28. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  29. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  30. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  31. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  32. mindspore/_extends/parse/__init__.py +5 -3
  33. mindspore/_extends/parse/namespace.py +17 -2
  34. mindspore/_extends/parse/parser.py +193 -34
  35. mindspore/_extends/parse/resources.py +7 -8
  36. mindspore/_extends/parse/standard_method.py +1780 -435
  37. mindspore/_extends/parse/trope.py +3 -1
  38. mindspore/amp.py +53 -58
  39. mindspore/atlprov.dll +0 -0
  40. mindspore/boost/adasum.py +3 -2
  41. mindspore/boost/boost.py +2 -2
  42. mindspore/boost/boost_cell_wrapper.py +46 -26
  43. mindspore/boost/dim_reduce.py +6 -5
  44. mindspore/boost/grad_accumulation.py +2 -1
  45. mindspore/boost/group_loss_scale_manager.py +1 -1
  46. mindspore/c1.dll +0 -0
  47. mindspore/c1xx.dll +0 -0
  48. mindspore/c2.dll +0 -0
  49. mindspore/cfgpersist.dll +0 -0
  50. mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
  51. mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
  52. mindspore/common/__init__.py +11 -10
  53. mindspore/common/_decorator.py +2 -0
  54. mindspore/common/_register_for_adapter.py +55 -0
  55. mindspore/common/_stub_tensor.py +201 -0
  56. mindspore/common/_utils.py +57 -0
  57. mindspore/common/api.py +582 -297
  58. mindspore/common/dtype.py +66 -18
  59. mindspore/common/dump.py +2 -2
  60. mindspore/common/initializer.py +38 -1
  61. mindspore/common/jit_config.py +25 -13
  62. mindspore/common/mutable.py +53 -24
  63. mindspore/common/parameter.py +60 -37
  64. mindspore/common/seed.py +8 -24
  65. mindspore/common/sparse_tensor.py +927 -0
  66. mindspore/common/tensor.py +1627 -3900
  67. mindspore/communication/__init__.py +10 -5
  68. mindspore/communication/_comm_helper.py +78 -214
  69. mindspore/communication/_hccl_management.py +2 -1
  70. mindspore/communication/management.py +136 -47
  71. mindspore/config/op_info.config +501 -1008
  72. mindspore/context.py +291 -56
  73. mindspore/d3dcompiler_47.dll +0 -0
  74. mindspore/dataset/__init__.py +12 -8
  75. mindspore/dataset/audio/__init__.py +9 -9
  76. mindspore/dataset/audio/transforms.py +1090 -228
  77. mindspore/dataset/audio/utils.py +87 -39
  78. mindspore/dataset/audio/validators.py +223 -1
  79. mindspore/dataset/callback/ds_callback.py +17 -15
  80. mindspore/dataset/core/config.py +246 -17
  81. mindspore/dataset/core/py_util_helpers.py +4 -3
  82. mindspore/dataset/core/validator_helpers.py +10 -10
  83. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  84. mindspore/dataset/debug/debug_hook.py +65 -0
  85. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  86. mindspore/dataset/engine/__init__.py +7 -3
  87. mindspore/dataset/engine/cache_client.py +9 -9
  88. mindspore/dataset/engine/datasets.py +648 -477
  89. mindspore/dataset/engine/datasets_audio.py +165 -167
  90. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  91. mindspore/dataset/engine/datasets_text.py +492 -342
  92. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  93. mindspore/dataset/engine/datasets_vision.py +1224 -699
  94. mindspore/dataset/engine/graphdata.py +134 -69
  95. mindspore/dataset/engine/iterators.py +50 -9
  96. mindspore/dataset/engine/offload.py +52 -31
  97. mindspore/dataset/engine/samplers.py +27 -24
  98. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  99. mindspore/dataset/engine/validators.py +213 -52
  100. mindspore/dataset/text/__init__.py +10 -8
  101. mindspore/dataset/text/transforms.py +152 -57
  102. mindspore/dataset/text/utils.py +98 -49
  103. mindspore/dataset/text/validators.py +25 -0
  104. mindspore/dataset/transforms/__init__.py +4 -2
  105. mindspore/dataset/transforms/c_transforms.py +11 -13
  106. mindspore/dataset/transforms/py_transforms.py +2 -2
  107. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  108. mindspore/dataset/transforms/transforms.py +13 -15
  109. mindspore/dataset/transforms/validators.py +7 -7
  110. mindspore/dataset/utils/__init__.py +2 -1
  111. mindspore/dataset/utils/browse_dataset.py +13 -13
  112. mindspore/dataset/utils/line_reader.py +121 -0
  113. mindspore/dataset/vision/__init__.py +8 -7
  114. mindspore/dataset/vision/c_transforms.py +125 -126
  115. mindspore/dataset/vision/py_transforms.py +37 -37
  116. mindspore/dataset/vision/py_transforms_util.py +23 -20
  117. mindspore/dataset/vision/transforms.py +316 -315
  118. mindspore/dataset/vision/utils.py +313 -17
  119. mindspore/dataset/vision/validators.py +6 -6
  120. mindspore/default_config.py +0 -1
  121. mindspore/dpcmi.dll +0 -0
  122. mindspore/{compression → experimental}/__init__.py +6 -5
  123. mindspore/experimental/map_parameter.py +275 -0
  124. mindspore/include/OWNERS +0 -1
  125. mindspore/include/api/callback/callback.h +9 -13
  126. mindspore/include/api/callback/ckpt_saver.h +2 -2
  127. mindspore/include/api/callback/loss_monitor.h +2 -2
  128. mindspore/include/api/callback/lr_scheduler.h +5 -5
  129. mindspore/include/api/callback/time_monitor.h +2 -2
  130. mindspore/include/api/callback/train_accuracy.h +4 -6
  131. mindspore/include/api/cfg.h +19 -6
  132. mindspore/include/api/context.h +70 -9
  133. mindspore/include/api/delegate.h +8 -1
  134. mindspore/include/api/dual_abi_helper.h +8 -24
  135. mindspore/include/api/metrics/accuracy.h +2 -2
  136. mindspore/include/api/metrics/metrics.h +4 -3
  137. mindspore/include/api/model.h +9 -4
  138. mindspore/include/api/model_group.h +68 -0
  139. mindspore/include/api/model_parallel_runner.h +17 -17
  140. mindspore/include/api/net.h +12 -11
  141. mindspore/include/api/serialization.h +20 -4
  142. mindspore/include/api/status.h +7 -1
  143. mindspore/include/api/types.h +25 -21
  144. mindspore/include/api/visible.h +4 -0
  145. mindspore/include/c_api/model_c.h +5 -0
  146. mindspore/include/c_api/status_c.h +1 -1
  147. mindspore/include/dataset/config.h +1 -1
  148. mindspore/include/dataset/constants.h +14 -0
  149. mindspore/include/dataset/text.h +59 -0
  150. mindspore/include/dataset/vision.h +56 -117
  151. mindspore/include/dataset/vision_lite.h +102 -0
  152. mindspore/jpeg62.dll +0 -0
  153. mindspore/log.py +28 -28
  154. mindspore/mindrecord/common/exceptions.py +2 -4
  155. mindspore/mindrecord/filereader.py +19 -1
  156. mindspore/mindrecord/filewriter.py +250 -88
  157. mindspore/mindrecord/mindpage.py +13 -13
  158. mindspore/mindrecord/shardheader.py +15 -15
  159. mindspore/mindrecord/shardreader.py +9 -0
  160. mindspore/mindrecord/shardwriter.py +29 -29
  161. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  162. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  163. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  164. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  165. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  166. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  167. mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
  168. mindspore/mindspore_common.dll +0 -0
  169. mindspore/mindspore_core.dll +0 -0
  170. mindspore/mindspore_glog.dll +0 -0
  171. mindspore/mindspore_shared_lib.dll +0 -0
  172. mindspore/msobj140.dll +0 -0
  173. mindspore/mspdb140.dll +0 -0
  174. mindspore/mspdbcore.dll +0 -0
  175. mindspore/mspdbst.dll +0 -0
  176. mindspore/mspft140.dll +0 -0
  177. mindspore/msvcdis140.dll +0 -0
  178. mindspore/msvcp140_1.dll +0 -0
  179. mindspore/msvcp140_2.dll +0 -0
  180. mindspore/msvcp140_atomic_wait.dll +0 -0
  181. mindspore/msvcp140_codecvt_ids.dll +0 -0
  182. mindspore/nn/__init__.py +1 -5
  183. mindspore/nn/cell.py +297 -234
  184. mindspore/nn/dynamic_lr.py +1 -1
  185. mindspore/nn/grad/cell_grad.py +17 -42
  186. mindspore/nn/layer/__init__.py +7 -4
  187. mindspore/nn/layer/activation.py +131 -88
  188. mindspore/nn/layer/basic.py +313 -613
  189. mindspore/nn/layer/channel_shuffle.py +103 -0
  190. mindspore/nn/layer/combined.py +1 -1
  191. mindspore/nn/layer/container.py +52 -6
  192. mindspore/nn/layer/conv.py +112 -43
  193. mindspore/nn/layer/dense.py +10 -9
  194. mindspore/nn/layer/embedding.py +36 -34
  195. mindspore/nn/layer/image.py +123 -27
  196. mindspore/nn/layer/math.py +108 -107
  197. mindspore/nn/layer/normalization.py +212 -366
  198. mindspore/nn/layer/padding.py +370 -42
  199. mindspore/nn/layer/pooling.py +1443 -219
  200. mindspore/nn/layer/rnn_cells.py +11 -16
  201. mindspore/nn/layer/rnns.py +38 -39
  202. mindspore/nn/layer/thor_layer.py +24 -25
  203. mindspore/nn/layer/timedistributed.py +5 -5
  204. mindspore/nn/layer/transformer.py +701 -0
  205. mindspore/nn/learning_rate_schedule.py +8 -8
  206. mindspore/nn/loss/__init__.py +9 -6
  207. mindspore/nn/loss/loss.py +678 -142
  208. mindspore/nn/metrics.py +53 -0
  209. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  210. mindspore/nn/optim/ada_grad.py +8 -8
  211. mindspore/nn/optim/adadelta.py +2 -3
  212. mindspore/nn/optim/adafactor.py +18 -14
  213. mindspore/nn/optim/adam.py +429 -87
  214. mindspore/nn/optim/adamax.py +5 -6
  215. mindspore/nn/optim/adasum.py +10 -8
  216. mindspore/nn/optim/asgd.py +7 -7
  217. mindspore/nn/optim/ftrl.py +81 -11
  218. mindspore/nn/optim/lamb.py +7 -8
  219. mindspore/nn/optim/lars.py +4 -4
  220. mindspore/nn/optim/lazyadam.py +82 -7
  221. mindspore/nn/optim/momentum.py +8 -7
  222. mindspore/nn/optim/optimizer.py +19 -10
  223. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  224. mindspore/nn/optim/rmsprop.py +3 -3
  225. mindspore/nn/optim/rprop.py +20 -16
  226. mindspore/nn/optim/sgd.py +21 -15
  227. mindspore/nn/optim/thor.py +23 -21
  228. mindspore/nn/probability/__init__.py +0 -2
  229. mindspore/nn/probability/bijector/bijector.py +7 -6
  230. mindspore/nn/probability/bijector/invert.py +4 -2
  231. mindspore/nn/probability/bijector/softplus.py +2 -2
  232. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  233. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  234. mindspore/nn/probability/distribution/__init__.py +6 -0
  235. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  236. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  237. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  238. mindspore/nn/probability/distribution/beta.py +1 -1
  239. mindspore/nn/probability/distribution/categorical.py +9 -9
  240. mindspore/nn/probability/distribution/cauchy.py +8 -8
  241. mindspore/nn/probability/distribution/distribution.py +12 -6
  242. mindspore/nn/probability/distribution/exponential.py +5 -5
  243. mindspore/nn/probability/distribution/gamma.py +3 -3
  244. mindspore/nn/probability/distribution/geometric.py +6 -5
  245. mindspore/nn/probability/distribution/gumbel.py +5 -5
  246. mindspore/nn/probability/distribution/half_normal.py +133 -0
  247. mindspore/nn/probability/distribution/laplace.py +128 -0
  248. mindspore/nn/probability/distribution/log_normal.py +0 -1
  249. mindspore/nn/probability/distribution/logistic.py +4 -5
  250. mindspore/nn/probability/distribution/normal.py +11 -15
  251. mindspore/nn/probability/distribution/poisson.py +6 -2
  252. mindspore/nn/probability/distribution/student_t.py +150 -0
  253. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  254. mindspore/nn/probability/distribution/uniform.py +5 -5
  255. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  256. mindspore/nn/reinforcement/tensor_array.py +2 -2
  257. mindspore/nn/sparse/sparse.py +8 -1
  258. mindspore/nn/wrap/cell_wrapper.py +55 -27
  259. mindspore/nn/wrap/grad_reducer.py +20 -11
  260. mindspore/nn/wrap/loss_scale.py +47 -30
  261. mindspore/numpy/array_creations.py +33 -22
  262. mindspore/numpy/array_ops.py +46 -42
  263. mindspore/numpy/logic_ops.py +6 -27
  264. mindspore/numpy/math_ops.py +26 -19
  265. mindspore/numpy/utils.py +1 -8
  266. mindspore/numpy/utils_const.py +112 -62
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/opencv_imgproc452.dll +0 -0
  270. mindspore/ops/__init__.py +6 -3
  271. mindspore/ops/_constants.py +0 -6
  272. mindspore/ops/_grad/__init__.py +2 -1
  273. mindspore/ops/_grad/grad_array_ops.py +209 -152
  274. mindspore/ops/_grad/grad_base.py +55 -17
  275. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  276. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  277. mindspore/ops/_grad/grad_implementations.py +21 -61
  278. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  279. mindspore/ops/_grad/grad_math_ops.py +306 -161
  280. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  281. mindspore/ops/_grad/grad_other_ops.py +1 -1
  282. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  283. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  284. mindspore/ops/_grad/grad_sparse.py +15 -9
  285. mindspore/ops/_grad_experimental/__init__.py +1 -0
  286. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  287. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  288. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  289. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  290. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  291. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  292. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  293. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  294. mindspore/ops/_op_impl/__init__.py +3 -3
  295. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  296. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  297. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  298. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  299. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  300. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  302. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  303. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  304. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  305. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  307. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  314. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  315. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  317. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  318. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  320. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  321. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  322. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  326. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  327. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  328. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  329. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  330. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  331. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  332. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  335. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  336. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  337. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  338. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  339. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  340. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  341. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  342. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  343. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  344. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  345. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  346. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  347. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  349. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  350. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  351. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  352. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  353. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  354. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  355. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  356. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  357. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  358. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  359. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  360. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  361. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  362. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  363. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  365. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  366. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  367. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  368. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  369. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  370. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  371. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  372. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  373. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  374. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  375. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  376. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  377. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  378. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  379. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  380. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  381. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  382. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  383. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  384. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  385. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  386. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  387. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  388. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  389. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  390. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  391. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  392. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  393. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  394. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  395. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  396. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  397. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  398. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  399. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  400. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  401. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  402. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  403. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  404. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  405. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  407. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  408. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  409. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  410. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  411. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  412. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  413. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  414. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  415. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  416. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  417. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  418. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  419. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  420. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  421. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  422. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  423. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  424. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  425. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  426. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  427. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  428. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  429. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  430. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  431. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  432. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  433. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  434. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  435. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  436. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  437. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  441. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  442. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  443. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  444. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  445. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  446. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  447. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  448. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  449. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  450. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  451. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  452. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  453. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  454. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  455. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  456. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  457. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  458. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  459. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  460. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  461. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  462. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  463. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  464. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  465. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  466. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  467. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  468. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  469. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  470. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  471. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  472. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  473. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  474. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  475. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  476. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  477. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  478. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  479. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  480. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  481. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  482. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  483. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  484. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  485. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  486. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  487. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  488. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  489. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  490. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  491. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  492. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  493. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  494. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  495. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  496. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  497. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  498. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  499. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  500. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  501. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  502. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  503. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  504. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  505. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  506. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  507. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  508. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  509. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  510. mindspore/ops/_primitive_cache.py +3 -2
  511. mindspore/ops/_register_for_op.py +11 -0
  512. mindspore/ops/_utils/__init__.py +1 -1
  513. mindspore/ops/_utils/utils.py +20 -41
  514. mindspore/ops/_vmap/__init__.py +2 -2
  515. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  516. mindspore/ops/_vmap/vmap_base.py +24 -10
  517. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  518. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  519. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  520. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  521. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  522. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  523. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  524. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  525. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  526. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  529. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  530. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  531. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  532. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  533. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  534. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  535. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  537. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  538. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  539. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  541. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  542. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  543. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  544. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  545. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  546. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  547. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  548. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  549. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  550. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  551. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  552. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  553. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  554. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  555. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  556. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  559. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  560. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  561. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  563. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  565. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  566. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  568. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  569. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  570. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  571. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  572. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  573. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  574. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  576. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  577. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  578. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  579. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  580. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  582. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  583. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  584. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  585. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  586. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  587. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  588. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  589. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  590. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  591. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  592. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  593. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  594. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  595. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  596. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  597. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  598. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  599. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  600. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  601. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  604. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  605. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  606. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  607. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  609. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  610. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  611. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  612. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  613. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  614. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  615. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  616. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  617. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  618. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  619. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  620. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  621. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  622. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  623. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  624. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  625. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  626. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  627. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  628. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  629. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  630. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  632. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  633. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  634. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  635. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  637. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  638. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  639. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  640. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  643. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  644. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  645. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  646. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  647. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  648. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  649. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  650. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  651. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  652. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  653. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  654. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  655. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  656. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  657. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  658. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  660. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  661. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  662. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  663. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  664. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  665. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  666. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  667. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  668. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  669. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  670. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  671. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  672. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  673. mindspore/ops/bprop_mindir/__init__.py +1 -4
  674. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  675. mindspore/ops/composite/__init__.py +12 -13
  676. mindspore/ops/composite/base.py +261 -254
  677. mindspore/ops/composite/env_ops.py +41 -0
  678. mindspore/ops/composite/math_ops.py +197 -156
  679. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  680. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  681. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  682. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  683. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  684. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  685. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  686. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  687. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  688. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  689. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  690. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  691. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  692. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  693. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  694. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  695. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  696. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  697. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  698. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  699. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  700. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  701. mindspore/ops/function/__init__.py +323 -8
  702. mindspore/ops/function/array_func.py +3511 -780
  703. mindspore/ops/function/clip_func.py +329 -0
  704. mindspore/ops/function/debug_func.py +6 -6
  705. mindspore/ops/function/grad/__init__.py +5 -1
  706. mindspore/ops/function/grad/grad_func.py +736 -65
  707. mindspore/ops/function/image_func.py +270 -0
  708. mindspore/ops/function/linalg_func.py +268 -8
  709. mindspore/ops/function/math_func.py +8032 -3164
  710. mindspore/ops/function/nn_func.py +5619 -1855
  711. mindspore/ops/function/other_func.py +115 -0
  712. mindspore/ops/function/parameter_func.py +11 -10
  713. mindspore/ops/function/random_func.py +939 -77
  714. mindspore/ops/function/sparse_func.py +249 -84
  715. mindspore/ops/function/sparse_unary_func.py +2303 -0
  716. mindspore/ops/function/spectral_func.py +146 -0
  717. mindspore/ops/function/vmap_func.py +114 -0
  718. mindspore/ops/functional.py +182 -254
  719. mindspore/ops/op_info_register.py +79 -34
  720. mindspore/ops/operations/__init__.py +210 -118
  721. mindspore/ops/operations/_csr_ops.py +7 -7
  722. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  723. mindspore/ops/operations/_grad_ops.py +447 -322
  724. mindspore/ops/operations/_inner_ops.py +547 -176
  725. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  726. mindspore/ops/operations/_ms_kernel.py +29 -27
  727. mindspore/ops/operations/_ocr_ops.py +11 -11
  728. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  729. mindspore/ops/operations/_quant_ops.py +186 -101
  730. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  731. mindspore/ops/operations/_scalar_ops.py +466 -0
  732. mindspore/ops/operations/_sequence_ops.py +1047 -0
  733. mindspore/ops/operations/_tensor_array.py +10 -11
  734. mindspore/ops/operations/_thor_ops.py +4 -4
  735. mindspore/ops/operations/array_ops.py +1428 -1226
  736. mindspore/ops/operations/comm_ops.py +180 -117
  737. mindspore/ops/operations/control_ops.py +4 -2
  738. mindspore/ops/operations/custom_ops.py +185 -98
  739. mindspore/ops/operations/debug_ops.py +92 -54
  740. mindspore/ops/operations/image_ops.py +406 -211
  741. mindspore/ops/operations/inner_ops.py +42 -53
  742. mindspore/ops/operations/linalg_ops.py +32 -29
  743. mindspore/ops/operations/math_ops.py +2076 -897
  744. mindspore/ops/operations/nn_ops.py +1282 -1252
  745. mindspore/ops/operations/other_ops.py +124 -278
  746. mindspore/ops/operations/random_ops.py +345 -178
  747. mindspore/ops/operations/rl_ops.py +8 -9
  748. mindspore/ops/operations/sparse_ops.py +502 -157
  749. mindspore/ops/operations/spectral_ops.py +107 -0
  750. mindspore/ops/primitive.py +192 -15
  751. mindspore/ops/vm_impl_registry.py +23 -2
  752. mindspore/parallel/__init__.py +6 -1
  753. mindspore/parallel/_auto_parallel_context.py +199 -92
  754. mindspore/parallel/_cell_wrapper.py +4 -2
  755. mindspore/parallel/_cost_model_context.py +3 -0
  756. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  757. mindspore/parallel/_offload_context.py +185 -0
  758. mindspore/parallel/_parallel_serialization.py +167 -28
  759. mindspore/parallel/_ps_context.py +9 -5
  760. mindspore/parallel/_recovery_context.py +1 -1
  761. mindspore/parallel/_tensor.py +9 -1
  762. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  763. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  764. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  765. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  766. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  767. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  768. mindspore/parallel/_utils.py +47 -7
  769. mindspore/parallel/algo_parameter_config.py +5 -1
  770. mindspore/parallel/checkpoint_transform.py +329 -0
  771. mindspore/parallel/shard.py +229 -0
  772. mindspore/perf_msvcbuildinsights.dll +0 -0
  773. mindspore/pgodb140.dll +0 -0
  774. mindspore/pgort140.dll +0 -0
  775. mindspore/profiler/__init__.py +2 -1
  776. mindspore/profiler/common/util.py +4 -3
  777. mindspore/profiler/common/validator/validate_path.py +2 -2
  778. mindspore/profiler/envprofiling.py +249 -0
  779. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  780. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  781. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  782. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  783. mindspore/profiler/parser/framework_parser.py +42 -16
  784. mindspore/profiler/parser/hccl_parser.py +158 -158
  785. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  786. mindspore/profiler/parser/integrator.py +18 -1579
  787. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  788. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  789. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  790. mindspore/profiler/parser/optime_parser.py +17 -18
  791. mindspore/profiler/parser/profiler_info.py +108 -0
  792. mindspore/profiler/parser/step_trace_parser.py +1 -1
  793. mindspore/profiler/profiling.py +396 -194
  794. mindspore/rewrite/__init__.py +6 -2
  795. mindspore/rewrite/api/node.py +51 -110
  796. mindspore/rewrite/api/node_type.py +10 -6
  797. mindspore/rewrite/api/pattern_engine.py +51 -7
  798. mindspore/rewrite/api/scoped_value.py +64 -53
  799. mindspore/rewrite/api/symbol_tree.py +108 -61
  800. mindspore/rewrite/api/tree_node_helper.py +2 -3
  801. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  802. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  803. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  804. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  805. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  806. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  807. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  808. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  809. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  810. mindspore/rewrite/common/__init__.py +2 -0
  811. mindspore/rewrite/common/event.py +1 -1
  812. mindspore/rewrite/common/observable.py +1 -1
  813. mindspore/rewrite/common/observer.py +1 -1
  814. mindspore/rewrite/common/rewrite_elog.py +35 -0
  815. mindspore/rewrite/namer.py +2 -2
  816. mindspore/rewrite/namespace.py +14 -4
  817. mindspore/rewrite/node.py +161 -13
  818. mindspore/rewrite/parser.py +0 -1
  819. mindspore/rewrite/parser_register.py +0 -1
  820. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  821. mindspore/rewrite/parsers/assign_parser.py +267 -67
  822. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  823. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  824. mindspore/rewrite/parsers/constant_parser.py +101 -0
  825. mindspore/rewrite/parsers/container_parser.py +88 -0
  826. mindspore/rewrite/parsers/for_parser.py +28 -15
  827. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  828. mindspore/rewrite/parsers/if_parser.py +11 -28
  829. mindspore/rewrite/parsers/module_parser.py +9 -6
  830. mindspore/rewrite/parsers/return_parser.py +3 -2
  831. mindspore/rewrite/sparsify/__init__.py +0 -0
  832. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  833. mindspore/rewrite/sparsify/sparsify.py +109 -0
  834. mindspore/rewrite/sparsify/utils.py +173 -0
  835. mindspore/rewrite/symbol_tree.py +322 -109
  836. mindspore/rewrite/symbol_tree_builder.py +45 -8
  837. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  838. mindspore/rewrite/topological_manager.py +1 -2
  839. mindspore/run_check/_check_version.py +209 -112
  840. mindspore/run_check/run_check.py +2 -1
  841. mindspore/tbbmalloc.dll +0 -0
  842. mindspore/tinyxml2.dll +0 -0
  843. mindspore/train/__init__.py +6 -4
  844. mindspore/train/_utils.py +28 -5
  845. mindspore/train/amp.py +321 -50
  846. mindspore/train/callback/__init__.py +3 -1
  847. mindspore/train/callback/_backup_and_restore.py +120 -0
  848. mindspore/train/callback/_callback.py +8 -8
  849. mindspore/train/callback/_checkpoint.py +12 -9
  850. mindspore/train/callback/_early_stop.py +13 -7
  851. mindspore/train/callback/_history.py +8 -8
  852. mindspore/train/callback/_lambda_callback.py +6 -6
  853. mindspore/train/callback/_landscape.py +36 -38
  854. mindspore/train/callback/_loss_monitor.py +12 -6
  855. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  856. mindspore/train/callback/_on_request_exit.py +212 -0
  857. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  858. mindspore/train/callback/_summary_collector.py +27 -19
  859. mindspore/train/callback/_time_monitor.py +13 -7
  860. mindspore/train/checkpoint_pb2.py +68 -8
  861. mindspore/train/data_sink.py +122 -33
  862. mindspore/train/dataset_helper.py +28 -87
  863. mindspore/train/loss_scale_manager.py +4 -7
  864. mindspore/{nn → train}/metrics/__init__.py +20 -20
  865. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  866. mindspore/{nn → train}/metrics/auc.py +4 -4
  867. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  868. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  869. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  870. mindspore/{nn → train}/metrics/dice.py +6 -5
  871. mindspore/{nn → train}/metrics/error.py +7 -5
  872. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  873. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  874. mindspore/{nn → train}/metrics/loss.py +4 -3
  875. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  876. mindspore/{nn → train}/metrics/metric.py +6 -5
  877. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  878. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  879. mindspore/{nn → train}/metrics/precision.py +5 -4
  880. mindspore/{nn → train}/metrics/recall.py +5 -4
  881. mindspore/{nn → train}/metrics/roc.py +7 -6
  882. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  883. mindspore/{nn → train}/metrics/topk.py +7 -5
  884. mindspore/train/mind_ir_pb2.py +339 -32
  885. mindspore/train/model.py +113 -84
  886. mindspore/train/serialization.py +547 -167
  887. mindspore/train/summary/_summary_adapter.py +1 -1
  888. mindspore/train/summary/summary_record.py +43 -12
  889. mindspore/train/train_thor/convert_utils.py +7 -1
  890. mindspore/train/train_thor/dataset_helper.py +3 -3
  891. mindspore/train/train_thor/model_thor.py +0 -4
  892. mindspore/turbojpeg.dll +0 -0
  893. mindspore/vcmeta.dll +0 -0
  894. mindspore/vcruntime140.dll +0 -0
  895. mindspore/vcruntime140_1.dll +0 -0
  896. mindspore/version.py +1 -1
  897. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  898. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
  899. mindspore/compression/common/constant.py +0 -124
  900. mindspore/compression/export/__init__.py +0 -19
  901. mindspore/compression/export/quant_export.py +0 -514
  902. mindspore/compression/quant/qat.py +0 -636
  903. mindspore/compression/quant/quant_utils.py +0 -462
  904. mindspore/compression/quant/quantizer.py +0 -68
  905. mindspore/libatomic-1.dll +0 -0
  906. mindspore/libgcc_s_seh-1.dll +0 -0
  907. mindspore/libgfortran-4.dll +0 -0
  908. mindspore/libgomp-1.dll +0 -0
  909. mindspore/libjpeg-62.dll +0 -0
  910. mindspore/libmindspore.dll +0 -0
  911. mindspore/libmindspore_common.dll +0 -0
  912. mindspore/libmindspore_core.dll +0 -0
  913. mindspore/libmindspore_glog.dll +0 -0
  914. mindspore/libnnacl.dll +0 -0
  915. mindspore/libopencv_core452.dll +0 -0
  916. mindspore/libopencv_imgcodecs452.dll +0 -0
  917. mindspore/libopencv_imgproc452.dll +0 -0
  918. mindspore/libquadmath-0.dll +0 -0
  919. mindspore/libsqlite3.dll +0 -0
  920. mindspore/libssp-0.dll +0 -0
  921. mindspore/libstdc++-6.dll +0 -0
  922. mindspore/libtinyxml2.dll +0 -0
  923. mindspore/libturbojpeg.dll +0 -0
  924. mindspore/libwinpthread-1.dll +0 -0
  925. mindspore/nn/layer/quant.py +0 -1868
  926. mindspore/nn/layer/rnn_utils.py +0 -90
  927. mindspore/nn/probability/dpn/__init__.py +0 -22
  928. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  929. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  930. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  931. mindspore/nn/probability/infer/__init__.py +0 -22
  932. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  933. mindspore/nn/probability/infer/variational/svi.py +0 -84
  934. mindspore/nn/probability/toolbox/__init__.py +0 -22
  935. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  936. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  937. mindspore/nn/probability/transforms/__init__.py +0 -22
  938. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  939. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  940. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  941. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  942. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  943. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  944. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  945. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  946. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  947. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  948. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  949. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  950. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  951. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  952. mindspore/ops/composite/array_ops.py +0 -210
  953. mindspore/ops/composite/clip_ops.py +0 -238
  954. mindspore/ops/composite/random_ops.py +0 -426
  955. mindspore/ops/composite/vmap_ops.py +0 -38
  956. mindspore/ops/operations/sponge_ops.py +0 -3531
  957. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  958. mindspore/parallel/nn/__init__.py +0 -42
  959. mindspore/parallel/nn/loss.py +0 -22
  960. mindspore/parallel/nn/moe.py +0 -21
  961. mindspore/parallel/nn/op_parallel_config.py +0 -22
  962. mindspore/parallel/nn/transformer.py +0 -31
  963. mindspore/run_check/_check_deps_version.py +0 -84
  964. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  965. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  966. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2022-2023 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@ import json
31
31
  import os
32
32
  import signal
33
33
  import stat
34
+ import warnings
34
35
 
35
36
  import gc
36
37
  import time
@@ -45,13 +46,13 @@ import copy
45
46
  import weakref
46
47
  import platform
47
48
  import psutil
48
- import numpy as np
49
49
 
50
50
  import mindspore._c_dataengine as cde
51
51
  from mindspore._c_expression import typing
52
52
 
53
53
  from mindspore import log as logger
54
- from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context, _enable_distributed_mindrt
54
+ from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context,\
55
+ _enable_distributed_mindrt
55
56
  from mindspore.dataset.engine.offload import GetOffloadModel
56
57
 
57
58
  import mindspore.dataset.transforms.c_transforms as c_transforms
@@ -59,6 +60,7 @@ import mindspore.dataset.transforms.py_transforms as py_transforms
59
60
  import mindspore.dataset.transforms as transforms
60
61
  from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCEPIECE_MODE
61
62
  from mindspore.parallel._utils import _get_device_num
63
+ from mindspore.dataset.debug import DebugHook
62
64
 
63
65
  from . import samplers
64
66
  from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
@@ -67,9 +69,9 @@ from .queue import _SharedQueue, _Queue
67
69
  from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
68
70
  check_rename, check_device_send, check_take, check_output_shape, check_project, \
69
71
  check_sync_wait, check_zip_dataset, check_add_column, check_concat, check_split, check_bucket_batch_by_length, \
70
- check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, deprecated
72
+ check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_padded_batch
71
73
  from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
72
- get_enable_watchdog, get_seed, set_seed
74
+ get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, _get_debug_hook_list
73
75
  from ..core.datatypes import mstype_to_detype
74
76
  from ..core.validator_helpers import replace_none
75
77
  from ..core.py_util_helpers import ExceptionHandler
@@ -114,16 +116,19 @@ def _get_training_dataset():
114
116
  return _train_dataset
115
117
 
116
118
 
117
- def _reset_training_dataset(step):
119
+ def _reset_training_dataset(global_step, dataset_size):
118
120
  """
119
- Reset the training dataset to the given step number.
121
+ Reset the training dataset to the given global step.
120
122
 
121
123
  Args:
122
- step (int): Global step number.
124
+ global_step (int): Number of global steps that have completed training.
125
+ Dataset will provide data from its next step after reset.
126
+ dataset_size (int): Number of steps per epoch.
123
127
  """
124
128
  dataset = _get_training_dataset()
125
129
  if dataset is not None:
126
- dataset._reset(step) # pylint: disable=W0212
130
+ epoch = global_step // dataset_size
131
+ dataset._reset(global_step, epoch) # pylint: disable=protected-access
127
132
  else:
128
133
  raise RuntimeError("Training dataset is not set.")
129
134
 
@@ -227,7 +232,7 @@ def _get_operator_process():
227
232
  Inner implemented method, mainly for passing sub-process id in C layer
228
233
 
229
234
  Returns:
230
- dict, mapping dict of operator id and corresponding process id.
235
+ dict, mapping dict of operation id and corresponding process id.
231
236
  """
232
237
  global _OP_PROCESS
233
238
  process_info = _OP_PROCESS
@@ -283,19 +288,20 @@ class Dataset:
283
288
  |
284
289
  MappableDataset
285
290
 
286
- DatasetOperator: MapDataset(UnionBaseDataset)
287
- BatchDataset(UnionBaseDataset)
288
- BucketBatchByLengthDataset(UnionBaseDataset)
289
- ShuffleDataset(UnionBaseDataset)
290
- FilterDataset(UnionBaseDataset)
291
- RepeatDataset(UnionBaseDataset)
292
- SkipDataset(UnionBaseDataset)
293
- TakeDataset(UnionBaseDataset)
294
- ZipDataset(UnionBaseDataset)
295
- ConcatDataset(UnionBaseDataset)
296
- RenameDataset(UnionBaseDataset)
297
- ProjectDataset(UnionBaseDataset)
298
- SyncWaitDataset(UnionBaseDataset)
291
+ DatasetOperation: MapDataset(UnionBaseDataset)
292
+ BatchDataset(UnionBaseDataset)
293
+ PaddedBatchDataset(UnionBaseDataset)
294
+ BucketBatchByLengthDataset(UnionBaseDataset)
295
+ ShuffleDataset(UnionBaseDataset)
296
+ FilterDataset(UnionBaseDataset)
297
+ RepeatDataset(UnionBaseDataset)
298
+ SkipDataset(UnionBaseDataset)
299
+ TakeDataset(UnionBaseDataset)
300
+ ZipDataset(UnionBaseDataset)
301
+ ConcatDataset(UnionBaseDataset)
302
+ RenameDataset(UnionBaseDataset)
303
+ ProjectDataset(UnionBaseDataset)
304
+ SyncWaitDataset(UnionBaseDataset)
299
305
 
300
306
  Impl Dataset - vision: ImageFolderDataset(MappableDataset, VisionBaseDataset)
301
307
  USPSDataset(SourceDataset, VisionBaseDataset)
@@ -309,8 +315,8 @@ class Dataset:
309
315
  NumpySlicesDataset(GeneratorDataset)
310
316
 
311
317
  Args:
312
- num_parallel_workers (int, optional): Number of workers to process the dataset in parallel
313
- (default=None).
318
+ num_parallel_workers (int, optional): Number of workers to process the dataset in parallel.
319
+ Default: None.
314
320
  """
315
321
 
316
322
  def __init__(self, children=None, num_parallel_workers=None, cache=None):
@@ -333,9 +339,6 @@ class Dataset:
333
339
  self.saved_output_shapes = None
334
340
  self.estimated_output_shapes = None
335
341
  self.runtime_context = None
336
- self.dynamic_setting = [False, None]
337
- self.saved_min_shapes = None
338
- self.saved_max_shapes = None
339
342
  self._col_names = None
340
343
  self.dataset_size = None
341
344
  self._batch_size = None
@@ -347,7 +350,7 @@ class Dataset:
347
350
  @staticmethod
348
351
  def _get_operator_id(dataset):
349
352
  """
350
- Internal method to iterate the tree and obtain op_id of each operator.
353
+ Internal method to iterate the tree and obtain op_id of each operation.
351
354
 
352
355
  Returns:
353
356
  Dataset, the root dataset of the tree.
@@ -379,18 +382,6 @@ class Dataset:
379
382
  _OP_PROCESS.update(generator_process)
380
383
  return op_name
381
384
 
382
- def close_pool(self):
383
- """
384
- Close multiprocessing pool in dataset. If you are familiar with multiprocessing library, you can regard this
385
- as a destructor for a processingPool object.
386
-
387
- Note:
388
- This interface will be deleted or invisible in the future. Please don't use it.
389
- When you find that there are residual processes that do not exit correctly, you can use `kill -9 PROCESS_ID`
390
- to end it, or through www.gitee.com/mindspore/mindspore send us an issue.
391
- """
392
- logger.warning("This interface will be deleted or invisible in the future. Please don't use it.")
393
-
394
385
  def create_ir_tree(self):
395
386
  """
396
387
  Internal method to build an IR tree.
@@ -444,7 +435,7 @@ class Dataset:
444
435
 
445
436
  @staticmethod
446
437
  def _noop_mode():
447
- if _is_role_sched() or (_is_role_pserver() and not _enable_distributed_mindrt()):
438
+ if _is_role_sched():
448
439
  return True
449
440
  return False
450
441
 
@@ -459,10 +450,13 @@ class Dataset:
459
450
  Serialize a pipeline into JSON string and dump into file if filename is provided.
460
451
 
461
452
  Args:
462
- filename (str): filename of JSON file to be saved as (default="").
453
+ filename (str): filename of JSON file to be saved as. Default: ''.
463
454
 
464
455
  Returns:
465
456
  str, JSON string of the pipeline.
457
+
458
+ Examples:
459
+ >>> dataset_json = dataset.to_json("/path/to/mnist_dataset_pipeline.json")
466
460
  """
467
461
  ir_tree, _ = self.create_ir_tree()
468
462
  return json.loads(ir_tree.to_json(filename))
@@ -495,7 +489,7 @@ class Dataset:
495
489
  element_length_function (Callable, optional): A function that takes in
496
490
  M arguments where M = len(column_names) and returns an integer. If no value
497
491
  provided, parameter M the len(column_names) must be 1, and the size of the first
498
- dimension of that column will be taken as the length (default=None).
492
+ dimension of that column will be taken as the length. Default: None.
499
493
  pad_info (dict, optional): The information about how to batch each column. The key
500
494
  corresponds to the column name, and the value must be a tuple of 2 elements.
501
495
  The first element corresponds to the shape to pad to, and the second
@@ -503,17 +497,17 @@ class Dataset:
503
497
  specified, then that column will be padded to the longest in the current
504
498
  batch, and 0 will be used as the padding value. Any None dimensions will
505
499
  be padded to the longest in the current batch, unless if
506
- pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
507
- to None (default=None).
500
+ `pad_to_bucket_boundary` is True. If no padding is wanted, set pad_info
501
+ to None. Default: None.
508
502
  pad_to_bucket_boundary (bool, optional): If True, will pad each None
509
- dimension in pad_info to the bucket_boundary minus 1. If there are any
510
- elements that fall into the last bucket, an error will occur
511
- (default=False).
503
+ dimension in `pad_info` to the bucket_boundary minus 1. If there are any
504
+ elements that fall into the last bucket, an error will occur.
505
+ Default: False.
512
506
  drop_remainder (bool, optional): If True, will drop the last batch for each
513
- bucket if it is not a full batch (default=False).
507
+ bucket if it is not a full batch. Default: False.
514
508
 
515
509
  Returns:
516
- Dataset, dataset bucketed and batched by length.
510
+ Dataset, dataset bucketized and batched by length.
517
511
 
518
512
  Examples:
519
513
  >>> # Create a dataset where certain counts rows are combined into a batch
@@ -541,14 +535,15 @@ class Dataset:
541
535
  element_length_function, pad_info, pad_to_bucket_boundary, drop_remainder)
542
536
 
543
537
  @check_batch
544
- def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None,
545
- input_columns=None, output_columns=None, column_order=None, pad_info=None,
546
- python_multiprocessing=False, max_rowsize=16):
538
+ def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs):
547
539
  """
548
- Combine batch_size number of consecutive rows into batches.
540
+ Combine batch_size number of consecutive rows into batch which apply per_batch_map to the samples first.
549
541
 
550
542
  For any column, all the elements within that column must have the same shape.
551
- If a per_batch_map callable is provided, it will be applied to the batches of tensors.
543
+
544
+ Refer to the following figure for the execution process:
545
+
546
+ .. image:: batch_en.png
552
547
 
553
548
  Note:
554
549
  The order of using repeat and batch reflects the number of batches and per_batch_map.
@@ -558,36 +553,39 @@ class Dataset:
558
553
  batch_size (Union[int, Callable]): The number of rows each batch is created with. An
559
554
  int or callable object which takes exactly 1 parameter, BatchInfo.
560
555
  drop_remainder (bool, optional): Determines whether or not to drop the last block
561
- whose data row number is less than batch size (default=False). If True, and if there are less
556
+ whose data row number is less than batch size. Default: False. If True, and if there are less
562
557
  than batch_size rows available to make the last batch, then those rows will
563
558
  be dropped and not propagated to the child node.
564
- num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
565
- (default=None).
566
- per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], (List[numpy.ndarray],\
567
- ..., List[numpy.ndarray])], optional): Per batch map callable (default=None). A callable
568
- which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters. Each
569
- list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists should
570
- match with the number of entries in input_columns. The last parameter of the callable should always be
571
- a BatchInfo object. Per_batch_map should return (list[numpy.ndarray], list[numpy.ndarray], ...). The
572
- length of each list in output should be the same as the input. output_columns is required if the number
573
- of output lists is different from input.
574
- input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of the list
575
- should match with signature of per_batch_map callable (default=None).
576
- output_columns (Union[str, list[str]], optional): List of names assigned to the columns
577
- outputted by the last operation. This parameter is mandatory if len(input_columns) !=
578
- len(output_columns). The size of this list must match the number of output
579
- columns of the last operation. (default=None, output columns will have the same
580
- name as the input columns, i.e., the columns will be replaced).
581
- column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
582
- dataset (default=None). The parameter is required when len(input_column) != len(output_column).
583
- Caution: the list here is not just the columns specified in parameter input_columns and output_columns.
584
- pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
585
- would pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0
586
- (default=None).
587
- python_multiprocessing (bool, optional): Parallelize Python function per_batch_map with multi-processing.
588
- This option could be beneficial if the function is computational heavy (default=False).
589
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
590
- data between processes. This is only used if python_multiprocessing is set to True (default=16).
559
+ num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
560
+ Default: None.
561
+ **kwargs:
562
+
563
+ - per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], \
564
+ (List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable. Default: None.
565
+ A callable which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters.
566
+ Each list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists
567
+ should match with the number of entries in input_columns. The last parameter of the callable should
568
+ always be a BatchInfo object. Per_batch_map should return
569
+ (list[numpy.ndarray], list[numpy.ndarray], ...). The length of each list in output should be the same
570
+ as the input. output_columns is required if the number of output lists is different from input.
571
+
572
+ - input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of
573
+ the list should match with signature of per_batch_map callable. Default: None.
574
+
575
+ - output_columns (Union[str, list[str]], optional): List of names assigned to the columns
576
+ outputted by the last operation. This parameter is mandatory if len(input_columns) !=
577
+ len(output_columns). The size of this list must match the number of output
578
+ columns of the last operation. Default: None, output columns will have the same
579
+ name as the input columns, i.e., the columns will be replaced.
580
+
581
+ - python_multiprocessing (bool, optional): Parallelize Python function `per_batch_map` with
582
+ multi-processing or multi-threading mode, True means multi-processing, False means multi-threading
583
+ If `per_batch_map` is a I/O bound task, use multi-threading mode.
584
+ If `per_batch_map` is a CPU bound task, it is recommended to use multi-processing mode.
585
+ Default: False, use python multi-threading mode.
586
+
587
+ - max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to
588
+ copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
591
589
 
592
590
  Returns:
593
591
  BatchDataset, dataset batched.
@@ -597,7 +595,7 @@ class Dataset:
597
595
  >>> # and drops the last incomplete batch if there is one.
598
596
  >>> dataset = dataset.batch(100, True)
599
597
  >>>
600
- >>> # 2resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
598
+ >>> # 2) resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
601
599
  >>> def np_resize(col, BatchInfo):
602
600
  ... output = col.copy()
603
601
  ... s = (BatchInfo.get_batch_num() + 1) ** 2
@@ -610,22 +608,64 @@ class Dataset:
610
608
  ... return (output,)
611
609
  >>> dataset = dataset.batch(batch_size=8, input_columns=["image"], per_batch_map=np_resize)
612
610
  >>>
613
- >>> # 3Create a dataset where its batch size is dynamic
611
+ >>> # 3) Create a dataset where its batch size is dynamic
614
612
  >>> # Define a callable batch size function and let batch size increase 1 each time.
615
613
  >>> def add_one(BatchInfo):
616
614
  ... return BatchInfo.get_batch_num() + 1
617
615
  >>> dataset = dataset.batch(batch_size=add_one, drop_remainder=True)
618
- >>>
619
- >>> # 4)Create a dataset with batch, then specify the column order.
620
- >>> # Assume that the original coulmn order is ["image", "label"] and change to ["label", "image"].
621
- >>> dataset = dataset.batch(32, column_order=["label", "image"])
622
616
  """
623
- if pad_info is not None:
624
- logger.warning("The parameter pad_info will be deprecated in the future. "
625
- "Please use '.map(operations=transforms.PadEnd(...), ...)' operation instead.")
617
+ return BatchDataset(self, batch_size, drop_remainder, num_parallel_workers, **kwargs)
618
+
619
+ @check_padded_batch
620
+ def padded_batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None):
621
+ """
622
+ Combine batch_size number of consecutive rows into batch which apply pad_info to the samples first.
623
+
624
+ Refer to the following figure for the execution process:
626
625
 
627
- return BatchDataset(self, batch_size, drop_remainder, num_parallel_workers, per_batch_map, input_columns,
628
- output_columns, column_order, pad_info, python_multiprocessing, max_rowsize)
626
+ .. image:: padded_batch_en.png
627
+
628
+ Note:
629
+ The order of using repeat and padded_batch reflects the number of batches.
630
+ It is recommended that the repeat operation applied after the padded_batch operation finished.
631
+
632
+ Args:
633
+ batch_size (Union[int, Callable]): The number of rows each batch is created with. An
634
+ int or callable object which takes exactly 1 parameter, BatchInfo.
635
+ drop_remainder (bool, optional): Determines whether or not to drop the last block
636
+ whose data row number is less than batch size. Default: False. If True, and if there are less
637
+ than batch_size rows available to make the last batch, then those rows will
638
+ be dropped and not propagated to the child node.
639
+ num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
640
+ Default: None.
641
+ pad_info (dict, optional): The information about how to batch each column. The key
642
+ corresponds to the column name, and the value must be a tuple of 2 elements.
643
+ The first element corresponds to the shape to pad to, and the second
644
+ element corresponds to the value to pad with. If a column is not
645
+ specified, then that column will be padded to the longest in the current
646
+ batch, and 0 will be used as the padding value. Any None dimensions will
647
+ be padded to the longest in the current batch, unless if
648
+ pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
649
+ to None. Default: None.
650
+
651
+ Returns:
652
+ PaddedBatchDataset, dataset batched.
653
+
654
+ Examples:
655
+ >>> # 1) Pad every sample to the largest sample's shape and batch the samples
656
+ >>> dataset = dataset.padded_batch(100, True, pad_info={})
657
+ >>>
658
+ >>> # 2) Create a dataset where every 100 rows are combined into a batch
659
+ >>> # and drops the last incomplete batch if there is one.
660
+ >>> dataset = dataset.padded_batch(100, True)
661
+ >>>
662
+ >>> # 3) Create a dataset where its batch size is dynamic
663
+ >>> # Define a callable batch size function and let batch size increase 1 each time.
664
+ >>> def add_one(BatchInfo):
665
+ ... return BatchInfo.get_batch_num() + 1
666
+ >>> dataset = dataset.padded_batch(batch_size=add_one, drop_remainder=True)
667
+ """
668
+ return PaddedBatchDataset(self, batch_size, drop_remainder, num_parallel_workers, pad_info)
629
669
 
630
670
  @check_sync_wait
631
671
  def sync_wait(self, condition_name, num_batch=1, callback=None):
@@ -634,8 +674,8 @@ class Dataset:
634
674
 
635
675
  Args:
636
676
  condition_name (str): The condition name that is used to toggle sending next row.
637
- num_batch (int): the number of batches without blocking at the start of each epoch (default=1).
638
- callback (function): The callback function that will be invoked when sync_update is called (default=None).
677
+ num_batch (int): the number of batches without blocking at the start of each epoch. Default: 1.
678
+ callback (function): The callback function that will be invoked when sync_update is called. Default: None.
639
679
 
640
680
  Returns:
641
681
  SyncWaitDataset, dataset added a blocking condition.
@@ -678,27 +718,27 @@ class Dataset:
678
718
  @check_shuffle
679
719
  def shuffle(self, buffer_size):
680
720
  """
681
- Randomly shuffles the rows of this dataset using the following policy:
721
+ Shuffle the dataset by creating a cache with the size of `buffer_size` .
682
722
 
683
- 1. Make a shuffle buffer that contains the first buffer_size rows.
723
+ 1. Make a shuffle buffer that contains the first `buffer_size` rows.
684
724
  2. Randomly select an element from the shuffle buffer to be the next row
685
725
  propagated to the child node.
686
726
  3. Get the next row (if any) from the parent node and put it in the shuffle buffer.
687
727
  4. Repeat steps 2 and 3 until there are no more rows left in the shuffle buffer.
688
728
 
689
- A random seed can be provided to be used on the first epoch via `dataset.config.set_seed`. In every subsequent
729
+ A random seed can be provided to be used on the first epoch via `dataset.config.set_seed` . In every subsequent
690
730
  epoch, the seed is changed to a new one, randomly generated value.
691
731
 
692
732
  Args:
693
733
  buffer_size (int): The size of the buffer (must be larger than 1) for
694
- shuffling. Setting buffer_size equal to the number of rows in the entire
734
+ shuffling. Setting `buffer_size` equal to the number of rows in the entire
695
735
  dataset will result in a global shuffle.
696
736
 
697
737
  Returns:
698
738
  Dataset, dataset shuffled.
699
739
 
700
740
  Raises:
701
- RuntimeError: If exist sync operators before shuffle.
741
+ RuntimeError: If exist sync operations before shuffle.
702
742
 
703
743
  Examples:
704
744
  >>> # dataset is an instance object of Dataset
@@ -715,7 +755,7 @@ class Dataset:
715
755
 
716
756
  Args:
717
757
  func (function): A function that must take one `numpy.ndarray` as an argument and
718
- return a `Dataset`.
758
+ return a `Dataset` .
719
759
 
720
760
  Returns:
721
761
  Dataset, dataset applied by the function.
@@ -767,57 +807,77 @@ class Dataset:
767
807
 
768
808
  @check_map
769
809
  def map(self, operations, input_columns=None, output_columns=None, column_order=None,
770
- num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None,
771
- max_rowsize=16, offload=None):
810
+ num_parallel_workers=None, **kwargs):
772
811
  """
773
812
  Apply each operation in operations to this dataset.
774
813
 
775
814
  Each operation will be passed one or more columns from the dataset as input, and one or
776
815
  more columns will be outputted. The first operation will be passed the columns specified
777
- in input_columns as input. If there is more than one operator in operations, the outputted
816
+ in input_columns as input. If there is more than one operation in operations, the outputted
778
817
  columns of the previous operation are used as the input columns for the next operation.
779
818
 
780
819
  The columns outputted by the very last operation will be assigned names specified by
781
- `output_columns`, and if not specified, the column name of output column is same as that of `input_columns`.
820
+ `output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
821
+
822
+ - If you use transformations (
823
+ `vision transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
824
+ dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
825
+ `nlp transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
826
+ dataset.transforms.html#module-mindspore.dataset.text>`_ ,
827
+ `audio transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
828
+ dataset.transforms.html#module-mindspore.dataset.audio>`_ )
829
+ provided by mindspore dataset, please use the following parameters:
830
+
831
+ .. image:: map_parameter_en.png
832
+
833
+ - If you use user-defined transform as PyFunc (Python Func), please use the following parameters:
834
+
835
+ .. image:: map_parameter_pyfunc_en.png
782
836
 
783
837
  Args:
784
838
  operations (Union[list[TensorOperation], list[functions]]): List of operations to be
785
839
  applied on the dataset. Operations are applied in the order they appear in this list.
786
840
  input_columns (Union[str, list[str]], optional): List of the names of the columns that will be passed to
787
841
  the first operation as input. The size of this list must match the number of
788
- input columns expected by the first operator. (default=None, the first
842
+ input columns expected by the first operation. Default: None, the first
789
843
  operation will be passed however many columns that are required, starting from
790
- the first column).
844
+ the first column.
791
845
  output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
792
846
  the last operation. This parameter is mandatory if len(input_columns) !=
793
847
  len(output_columns). The size of this list must match the number of output
794
- columns of the last operation. (default=None, output columns will have the same
795
- name as the input columns, i.e., the columns will be replaced).
796
- column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
797
- dataset (default=None). The parameter is required when len(input_column) != len(output_column).
798
- Caution: the list here is not just the columns specified in parameter input_columns and output_columns.
848
+ columns of the last operation. Default: None, output columns will have the same
849
+ name as the input columns, i.e., the columns will be replaced.
799
850
  num_parallel_workers (int, optional): Number of threads used to process the dataset in
800
- parallel (default=None, the value from the configuration will be used).
801
- python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
802
- option could be beneficial if the Python operation is computational heavy (default=False).
803
- cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
804
- (default=None, which means no cache is used).
805
- callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called (Default=None).
806
- max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
807
- data between processes. This is only used if python_multiprocessing is set to True (Default=16).
808
- offload (bool, optional): Flag to indicate whether offload is used (Default=None).
851
+ parallel. Default: None, the value from the configuration will be used.
852
+ **kwargs:
853
+
854
+ - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
855
+ This option could be beneficial if the Python operation is computational heavy. Default: False.
856
+
857
+ - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to
858
+ copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
859
+
860
+ - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
861
+ Default: None, which means no cache is used.
862
+
863
+ - callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called.
864
+ Default: None.
865
+
866
+ - offload (bool, optional): Flag to indicate whether offload is used. Default: None.
809
867
 
810
868
  Note:
811
869
  - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
812
870
  Python functions (PyFuncs).
813
871
  - Do not add network computing operators from mindspore.nn and mindspore.ops or others into this
814
- `operations`.
872
+ `operations` .
815
873
 
816
874
  Returns:
817
875
  Dataset, dataset after mapping operation.
818
876
 
819
877
  Examples:
820
878
  >>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
879
+ >>> # image is of type bytes type which can be decoded to RGB
880
+ >>> # label is of type int32
821
881
  >>>
822
882
  >>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
823
883
  >>> decode_op = c_vision.Decode(rgb=True)
@@ -826,30 +886,15 @@ class Dataset:
826
886
  >>>
827
887
  >>> # 1) Simple map example.
828
888
  >>>
829
- >>> # Apply decode_op on column "image". This column will be replaced by the outputted
830
- >>> # column of decode_op. Since column_order is not provided, both columns "image"
831
- >>> # and "label" will be propagated to the child node in their original order.
889
+ >>> # Apply decode_op on column "image".
832
890
  >>> dataset = dataset.map(operations=[decode_op], input_columns=["image"])
833
891
  >>>
834
892
  >>> # Decode and rename column "image" to "decoded_image".
835
893
  >>> dataset = dataset.map(operations=[decode_op], input_columns=["image"], output_columns=["decoded_image"])
836
894
  >>>
837
- >>> # Specify the order of the output columns.
838
- >>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
839
- ... output_columns=None, column_order=["label", "image"])
840
- >>>
841
- >>> # Rename column "image" to "decoded_image" and also specify the order of the output columns.
842
- >>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
843
- ... output_columns=["decoded_image"], column_order=["label", "decoded_image"])
844
- >>>
845
- >>> # Rename column "image" to "decoded_image" and keep only this column.
846
- >>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
847
- ... output_columns=["decoded_image"], column_order=["decoded_image"])
848
- >>>
849
- >>> # A simple example for mapping pyfunc. Renaming columns and specifying column order
850
- >>> # work in the same way as the previous examples.
895
+ >>> # A simple example for user defined python function transform.
851
896
  >>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
852
- >>> dataset = dataset.map(operations=[(lambda x: x + 1)], input_columns=["data"])
897
+ >>> dataset = dataset.map(operations=[(lambda x: x - 1)], input_columns=["data"])
853
898
  >>>
854
899
  >>> # 2) Map example with more than one operation.
855
900
  >>>
@@ -858,17 +903,14 @@ class Dataset:
858
903
  >>> # outputted by decode_op is passed as input to random_jitter_op.
859
904
  >>> # random_jitter_op will output one column. Column "image" will be replaced by
860
905
  >>> # the column outputted by random_jitter_op (the very last operation). All other
861
- >>> # columns are unchanged. Since column_order is not specified, the order of the
862
- >>> # columns will remain the same.
906
+ >>> # columns are unchanged.
863
907
  >>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"])
864
908
  >>>
865
909
  >>> # Rename the column outputted by random_jitter_op to "image_mapped".
866
- >>> # Specifying column order works in the same way as examples in 1).
867
910
  >>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"],
868
911
  ... output_columns=["image_mapped"])
869
912
  >>>
870
- >>> # Map with multiple operations using pyfunc. Renaming columns and specifying column order
871
- >>> # work in the same way as examples in 1).
913
+ >>> # Map with multiple operations using pyfunc and rename column's name
872
914
  >>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
873
915
  >>> dataset = dataset.map(operations=[(lambda x: x * x), (lambda x: x - 1)], input_columns=["data"],
874
916
  ... output_columns=["data_mapped"])
@@ -885,22 +927,9 @@ class Dataset:
885
927
  >>> operations = [(lambda x, y: (x, x + y, x + y + 1)),
886
928
  ... (lambda x, y, z: x * y * z),
887
929
  ... (lambda x: (x % 2, x % 3, x % 5, x % 7))]
888
- >>>
889
- >>> # Note: Since the number of input columns is not the same as the number of
890
- >>> # output columns, the output_columns and column_order parameters must be
891
- >>> # specified. Otherwise, this map call will also result in an error.
892
- >>>
893
930
  >>> dataset = ds.NumpySlicesDataset(data=([[0, 1, 2]], [[3, 4, 5]]), column_names=["x", "y"])
894
- >>>
895
- >>> # Propagate all columns to the child node in this order:
896
- >>> dataset = dataset.map(operations, input_columns=["x", "y"],
897
- ... output_columns=["mod2", "mod3", "mod5", "mod7"],
898
- ... column_order=["mod2", "mod3", "mod5", "mod7"])
899
- >>>
900
- >>> # Propagate some columns to the child node in this order:
901
931
  >>> dataset = dataset.map(operations, input_columns=["x", "y"],
902
- ... output_columns=["mod2", "mod3", "mod5", "mod7"],
903
- ... column_order=["mod7", "mod3", "col2"])
932
+ ... output_columns=["mod2", "mod3", "mod5", "mod7"])
904
933
  """
905
934
  if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
906
935
  num_parallel_workers = 1
@@ -909,8 +938,7 @@ class Dataset:
909
938
  "mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it "
910
939
  "with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.")
911
940
 
912
- return MapDataset(self, operations, input_columns, output_columns, column_order, num_parallel_workers,
913
- python_multiprocessing, cache, callbacks, max_rowsize, offload)
941
+ return MapDataset(self, operations, input_columns, output_columns, num_parallel_workers, **kwargs)
914
942
 
915
943
  @check_filter
916
944
  def filter(self, predicate, input_columns=None, num_parallel_workers=None):
@@ -920,9 +948,9 @@ class Dataset:
920
948
  Args:
921
949
  predicate (callable): Python callable which returns a boolean value. If False then filter the element.
922
950
  input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
923
- or provided with None, the predicate will be applied on all columns in the dataset (default=None).
951
+ or provided with None, the predicate will be applied on all columns in the dataset. Default: None.
924
952
  num_parallel_workers (int, optional): Number of workers to process the dataset
925
- in parallel (default=None).
953
+ in parallel. Default: None.
926
954
 
927
955
  Returns:
928
956
  Dataset, dataset filtered.
@@ -944,7 +972,7 @@ class Dataset:
944
972
  the repeat operation is used after the batch operation.
945
973
 
946
974
  Args:
947
- count (int): Number of times the dataset is going to be repeated (default=None).
975
+ count (int): Number of times the dataset is going to be repeated. Default: None.
948
976
 
949
977
  Returns:
950
978
  Dataset, dataset repeated.
@@ -960,7 +988,7 @@ class Dataset:
960
988
  >>> dataset = dataset.repeat(50)
961
989
  >>>
962
990
  >>> # Create a dataset where the dataset is first repeated for
963
- >>> # 50 epochs before shuffling. The shuffle operator will treat
991
+ >>> # 50 epochs before shuffling. The shuffle operation will treat
964
992
  >>> # the entire 50 epochs as one big dataset.
965
993
  >>> dataset = dataset.repeat(50)
966
994
  >>> dataset = dataset.shuffle(10)
@@ -997,7 +1025,7 @@ class Dataset:
997
1025
  then take the given number of rows; otherwise take the given number of batches.
998
1026
 
999
1027
  Args:
1000
- count (int, optional): Number of elements to be taken from the dataset (default=-1).
1028
+ count (int, optional): Number of elements to be taken from the dataset. Default: -1.
1001
1029
 
1002
1030
  Returns:
1003
1031
  Dataset, dataset taken.
@@ -1085,7 +1113,7 @@ class Dataset:
1085
1113
  - The sum of split sizes > K, the difference of sigma(round(fi * K)) - K will be removed from the first
1086
1114
  large enough split such that it will have at least 1 row after removing the difference.
1087
1115
 
1088
- randomize (bool, optional): Determines whether or not to split the data randomly (default=True).
1116
+ randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
1089
1117
  If True, the data will be randomly split. Otherwise, each split will be created with
1090
1118
  consecutive rows from the dataset.
1091
1119
 
@@ -1147,12 +1175,15 @@ class Dataset:
1147
1175
  name.
1148
1176
 
1149
1177
  Args:
1150
- datasets (Union[tuple, class Dataset]): A tuple of datasets or a single class Dataset
1178
+ datasets (Union[Dataset, tuple[Dataset]]): A tuple of datasets or a single class Dataset
1151
1179
  to be zipped together with this dataset.
1152
1180
 
1153
1181
  Returns:
1154
1182
  Dataset, dataset zipped.
1155
1183
 
1184
+ Raises:
1185
+ TypeError: The parameter is not dataset object or tuple of dataset objects.
1186
+
1156
1187
  Examples:
1157
1188
  >>> # Create a dataset which is the combination of dataset and dataset_1
1158
1189
  >>> dataset = dataset.zip(dataset_1)
@@ -1249,7 +1280,7 @@ class Dataset:
1249
1280
 
1250
1281
  Args:
1251
1282
  apply_func (function): A function that must take one `Dataset` as an argument and
1252
- return a preprocessed `Dataset`.
1283
+ return a preprocessed `Dataset` .
1253
1284
 
1254
1285
  Returns:
1255
1286
  Dataset, dataset applied by the function.
@@ -1284,9 +1315,9 @@ class Dataset:
1284
1315
  Return a transferred Dataset that transfers data through a device.
1285
1316
 
1286
1317
  Args:
1287
- send_epoch_end (bool, optional): Whether to send end of sequence to device or not (default=True).
1318
+ send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
1288
1319
  create_data_info_queue (bool, optional): Whether to create queue which stores
1289
- types and shapes of data or not(default=False).
1320
+ types and shapes of data or not. Default: False.
1290
1321
 
1291
1322
  Note:
1292
1323
  If device is Ascend, features of data will be transferred one by one. The limitation
@@ -1294,34 +1325,17 @@ class Dataset:
1294
1325
 
1295
1326
  Returns:
1296
1327
  Dataset, dataset for transferring.
1297
- """
1298
- return TransferDataset(self, send_epoch_end, create_data_info_queue)
1299
-
1300
- @check_device_send
1301
- def to_device(self, send_epoch_end=True, create_data_info_queue=False):
1302
- """
1303
- Transfer data from CPU to GPU or Ascend or other devices.
1304
-
1305
- Args:
1306
- send_epoch_end (bool, optional): Whether to send the end of sequence to device or not (default=True).
1307
- create_data_info_queue (bool, optional): Whether to create queue which stores
1308
- types and shapes of data or not(default=False).
1309
-
1310
- Note:
1311
- This interface will be deleted or invisible in the future.
1312
- Please use `device_que` to enable dataset sink mode.
1313
- If device is Ascend, features of data will be transferred one by one. The limitation
1314
- of data transmission per second is 256M.
1315
1328
 
1316
- Returns:
1317
- TransferDataset, dataset for transferring.
1318
-
1319
- Raises:
1320
- RuntimeError: If distribution file path is given but failed to read.
1329
+ Examples:
1330
+ >>> import time
1331
+ >>>
1332
+ >>> data = ds.TFRecordDataset('/path/to/TF_FILES', '/path/to/TF_SCHEMA_FILE', shuffle=ds.Shuffle.FILES)
1333
+ >>>
1334
+ >>> data = data.device_que()
1335
+ >>> data.send()
1336
+ >>> time.sleep(0.1)
1337
+ >>> data.stop_send()
1321
1338
  """
1322
- logger.warning("This interface will be deleted or invisible in the future. "
1323
- "Please use 'device_que' to enable dataset sink mode.")
1324
-
1325
1339
  return TransferDataset(self, send_epoch_end, create_data_info_queue)
1326
1340
 
1327
1341
  @check_save
@@ -1330,7 +1344,8 @@ class Dataset:
1330
1344
  Save the dynamic data processed by the dataset pipeline in common dataset format.
1331
1345
  Supported dataset formats: `mindrecord` only. And you can use `MindDataset` API to read the saved file(s).
1332
1346
 
1333
- Implicit type casting exists when saving data as `mindrecord`. The transform table shows how to do type casting.
1347
+ Implicit type casting exists when saving data as `mindrecord` . The transform table shows how to do
1348
+ type casting.
1334
1349
 
1335
1350
  .. list-table:: Implicit Type Casting when Saving as `mindrecord`
1336
1351
  :widths: 25 25 50
@@ -1381,8 +1396,8 @@ class Dataset:
1381
1396
 
1382
1397
  Note:
1383
1398
  1. To save the samples in order, set dataset's shuffle to False and num_files to 1.
1384
- 2. Before calling the function, do not use batch operator, repeat operator or data augmentation operators
1385
- with random attribute in map operator.
1399
+ 2. Before calling the function, do not use batch operation, repeat operation or data augmentation operations
1400
+ with random attribute in map operation.
1386
1401
  3. When array dimension is variable, one-dimensional arrays or
1387
1402
  multi-dimensional arrays with variable dimension 0 are supported.
1388
1403
  4. Mindrecord does not support uint64, multi-dimensional uint8(drop dimension) nor
@@ -1390,9 +1405,20 @@ class Dataset:
1390
1405
 
1391
1406
  Args:
1392
1407
  file_name (str): Path to dataset file.
1393
- num_files (int, optional): Number of dataset files (default=1).
1394
- file_type (str, optional): Dataset format (default='mindrecord').
1408
+ num_files (int, optional): Number of dataset files. Default: 1.
1409
+ file_type (str, optional): Dataset format. Default: 'mindrecord'.
1395
1410
 
1411
+ Examples:
1412
+ >>> import numpy as np
1413
+ >>>
1414
+ >>> def generator_1d():
1415
+ ... for i in range(10):
1416
+ ... yield (np.array([i]),)
1417
+ >>>
1418
+ >>>
1419
+ >>> # apply dataset operations
1420
+ >>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
1421
+ >>> d1.save('/path/to/save_file')
1396
1422
  """
1397
1423
  ir_tree, api_tree = self.create_ir_tree()
1398
1424
 
@@ -1409,20 +1435,20 @@ class Dataset:
1409
1435
  @check_tuple_iterator
1410
1436
  def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=True):
1411
1437
  """
1412
- Create an iterator over the dataset. The datatype retrieved back will be a list of `numpy.ndarray`.
1438
+ Create an iterator over the dataset. The datatype retrieved back will be a list of `numpy.ndarray` .
1413
1439
 
1414
1440
  To specify which columns to list and the order needed, use columns_list. If columns_list
1415
1441
  is not provided, the order of the columns will remain unchanged.
1416
1442
 
1417
1443
  Args:
1418
- columns (list[str], optional): List of columns to be used to specify the order of columns
1419
- (default=None, means all columns).
1444
+ columns (list[str], optional): List of columns to be used to specify the order of columns.
1445
+ Default: None, means all columns.
1420
1446
  num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
1421
- (default=-1, iterator can be iterated infinite number of epochs)
1447
+ Default: -1, iterator can be iterated infinite number of epochs.
1422
1448
  output_numpy (bool, optional): Whether or not to output NumPy datatype.
1423
- If output_numpy=False, iterator will output MSTensor (default=False).
1424
- do_copy (bool, optional): when output data type is mindspore.Tensor,
1425
- use this param to select the conversion method, only take False for better performance (default=True).
1449
+ If output_numpy=False, iterator will output MSTensor. Default: False.
1450
+ do_copy (bool, optional): When output data type is mindspore.Tensor,
1451
+ use this param to select the conversion method, only take False for better performance. Default: True.
1426
1452
 
1427
1453
  Returns:
1428
1454
  Iterator, tuple iterator over the dataset.
@@ -1444,15 +1470,17 @@ class Dataset:
1444
1470
  return TupleIterator(self, columns, num_epochs, output_numpy, do_copy)
1445
1471
 
1446
1472
  @check_dict_iterator
1447
- def create_dict_iterator(self, num_epochs=-1, output_numpy=False):
1473
+ def create_dict_iterator(self, num_epochs=-1, output_numpy=False, do_copy=True):
1448
1474
  """
1449
1475
  Create an iterator over the dataset. The data retrieved will be a dictionary datatype.
1450
1476
 
1451
1477
  Args:
1452
- num_epochs (int, optional): Maximum number of epochs that iterator can be iterated
1453
- (default=-1, iterator can be iterated infinite number of epochs).
1478
+ num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
1479
+ Default: -1, iterator can be iterated infinite number of epochs.
1454
1480
  output_numpy (bool, optional): Whether or not to output NumPy datatype,
1455
- if output_numpy=False, iterator will output MSTensor (default=False).
1481
+ if output_numpy=False, iterator will output MSTensor. Default: False.
1482
+ do_copy (bool, optional): When output data type is mindspore.Tensor,
1483
+ use this param to select the conversion method, only take False for better performance. Default: True.
1456
1484
 
1457
1485
  Returns:
1458
1486
  Iterator, dictionary iterator over the dataset.
@@ -1471,7 +1499,7 @@ class Dataset:
1471
1499
 
1472
1500
  if Dataset._noop_mode():
1473
1501
  return DummyIterator(self, 'dict', output_numpy)
1474
- return DictIterator(self, num_epochs, output_numpy)
1502
+ return DictIterator(self, num_epochs, output_numpy, do_copy)
1475
1503
 
1476
1504
  def __iter__(self):
1477
1505
  """Create an iterator over the dataset."""
@@ -1587,11 +1615,6 @@ class Dataset:
1587
1615
  if estimate and self.estimated_output_shapes is not None:
1588
1616
  return self.estimated_output_shapes
1589
1617
 
1590
- # if use set_dynamic_column, the `estimate` does not work, but they get the same result
1591
- if self.dynamic_setting[0]:
1592
- self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes = self._dynamic_output_shapes()
1593
- return self.saved_output_shapes
1594
-
1595
1618
  # We have a hang problem when two-level pipeline with multiprocessing, we need to extend the life cycle
1596
1619
  # of runtime_context. We found this hang problem only occur on output_types and output_shapes.
1597
1620
  runtime_getter = self._init_tree_getters()
@@ -1599,6 +1622,9 @@ class Dataset:
1599
1622
  api_tree = runtime_getter[2]
1600
1623
  output_shapes = runtime_getter[0].GetOutputShapes(estimate)
1601
1624
  del api_tree
1625
+ # Need to terminate the runtime context to avoid the occasional hang problem for
1626
+ # Python (with multiprocessing enabled) in sink mode.
1627
+ self.runtime_context.Terminate()
1602
1628
  del self.runtime_context
1603
1629
 
1604
1630
  if estimate:
@@ -1626,6 +1652,9 @@ class Dataset:
1626
1652
  api_tree = runtime_getter[2]
1627
1653
  self.saved_output_types = runtime_getter[0].GetOutputTypes()
1628
1654
  del api_tree
1655
+ # Need to terminate the runtime context to avoid the occasional hang problem for
1656
+ # Python (with multiprocessing enabled) in sink mode.
1657
+ self.runtime_context.Terminate()
1629
1658
  del self.runtime_context
1630
1659
  return self.saved_output_types
1631
1660
 
@@ -1648,136 +1677,6 @@ class Dataset:
1648
1677
 
1649
1678
  return self.dataset_size
1650
1679
 
1651
- @deprecated("1.5")
1652
- def set_dynamic_columns(self, columns=None):
1653
- """
1654
- Set dynamic shape information of source data, it should be set after the pipeline is defined.
1655
-
1656
- Args:
1657
- columns (dict): A dict contains shape information of each column in dataset.
1658
- The value of shape[i] is :py:obj:`None` indicates that the data length of shape[i] is dynamic.
1659
-
1660
- Examples:
1661
- >>> import numpy as np
1662
- >>>
1663
- >>> def generator1():
1664
- ... for i in range(1, 100):
1665
- ... yield np.ones((16, i, 83)), np.array(i)
1666
- >>>
1667
- >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
1668
- >>> dataset.set_dynamic_columns(columns={"data1": [16, None, 83], "data2": []})
1669
- """
1670
- if not isinstance(columns, dict):
1671
- raise TypeError("Pass a dict to set dynamic shape, example: {\"data1\": [16, None, 256]}")
1672
- self.dynamic_setting[0] = True
1673
- self.dynamic_setting[1] = columns
1674
-
1675
- def dynamic_min_max_shapes(self):
1676
- """
1677
- Get minimum and maximum data length of dynamic source data, for dynamic graph compilation.
1678
-
1679
- Returns:
1680
- lists, min_shapes, max_shapes of source data.
1681
-
1682
- Examples:
1683
- >>> import numpy as np
1684
- >>>
1685
- >>> def generator1():
1686
- ... for i in range(1, 100):
1687
- ... yield np.ones((16, i, 83)), np.array(i)
1688
- >>>
1689
- >>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
1690
- >>> dataset.set_dynamic_columns(columns={"data1": [16, None, 83], "data2": []})
1691
- >>> min_shapes, max_shapes = dataset.dynamic_min_max_shapes()
1692
- """
1693
- if self.saved_min_shapes is None or self.saved_max_shapes is None:
1694
- self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes = self._dynamic_output_shapes()
1695
- return self.saved_min_shapes, self.saved_max_shapes
1696
-
1697
- @staticmethod
1698
- def __check_dynamic_column_name(dynamic_columns, dataset_columns):
1699
- for column in dynamic_columns:
1700
- if column not in dataset_columns:
1701
- raise RuntimeError("dynamic column [" + column + "] does not match any column in dataset: " +
1702
- str(dataset_columns))
1703
-
1704
- @staticmethod
1705
- def __check_dynamic_column_shape(data, col, dynamic_columns):
1706
- shape_mismatch = "dynamic column [" + col + "] with shape " + str(dynamic_columns[col]) + \
1707
- " does not match dataset column [" + col + "] with shape " + str(list(data[col].shape))
1708
- if data[col].ndim != len(dynamic_columns[col]):
1709
- raise RuntimeError(shape_mismatch)
1710
- for dim in range(len(dynamic_columns[col])):
1711
- if dynamic_columns[col][dim] is not None and dynamic_columns[col][dim] != data[col].shape[dim]:
1712
- raise RuntimeError(shape_mismatch)
1713
-
1714
- def _dynamic_output_shapes(self):
1715
- """
1716
- Get dynamic information of source data.
1717
-
1718
- Returns:
1719
- lists, dynamic_shapes, min_shapes, max_shapes of source data.
1720
- """
1721
- if not self.dynamic_setting[1]:
1722
- raise RuntimeError("dynamic_columns is not set, call set_dynamic_columns() by final Dataset Op.")
1723
-
1724
- if self.saved_output_shapes is not None and self.saved_min_shapes is not None and \
1725
- self.saved_max_shapes is not None:
1726
- return self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes
1727
-
1728
- logger.warning("Calculating dynamic shape of input data, this will take a few minutes...")
1729
- # Assume data1 shape is dynamic, data2 shape is fix
1730
- dynamic_columns = self.dynamic_setting[1]
1731
- # ["data1", "data2"]
1732
- dataset_columns = self.get_col_names()
1733
- Dataset.__check_dynamic_column_name(dynamic_columns, dataset_columns)
1734
-
1735
- # Shape[1] of data1 is variable
1736
- # {"data1": {(batch_size, 100, feat_len), (16, 200, 83)}, "data2": {(batch_size, feat_len)}}
1737
- column_shape_set = {col: set() for col in dataset_columns}
1738
- dataset_size_counter = 0
1739
- for data in self.create_dict_iterator(num_epochs=1, output_numpy=True):
1740
- dataset_size_counter += 1
1741
- for col in data.keys():
1742
- if col in dynamic_columns:
1743
- Dataset.__check_dynamic_column_shape(data, col, dynamic_columns)
1744
- column_shape_set[col].add(tuple(data[col].shape))
1745
-
1746
- # we get dataset_size after dryrun
1747
- self.dataset_size = dataset_size_counter
1748
-
1749
- min_shapes, max_shapes, dynamic_shapes = list(), list(), list()
1750
- for col, shape_set in column_shape_set.items():
1751
- if len(shape_set) > 1:
1752
- if col not in dynamic_columns:
1753
- raise RuntimeError("column [" + col + "] has dynamic shape but not set by set_dynamic_columns()" +
1754
- ", shapes of [" + col + "]: " + str(list(shape_set)))
1755
- shape_npy = np.array(list(shape_set))
1756
- max_shape = shape_npy.max(axis=0)
1757
- min_shape = shape_npy.min(axis=0)
1758
-
1759
- # Set min shape to 1 due to unknown shuffle
1760
- min_shape = np.where(np.equal(dynamic_columns[col], None), 1, min_shape)
1761
- # Set dynamic dim to -1 for ME
1762
- dynamic_shape = np.where(np.equal(dynamic_columns[col], None), -1, dynamic_columns[col])
1763
-
1764
- max_shapes.append(max_shape.tolist())
1765
- min_shapes.append(min_shape.tolist())
1766
- dynamic_shapes.append(dynamic_shape.tolist())
1767
- else:
1768
- # Also append fix shape to keep order of column shape
1769
- fix_shape = list(list(shape_set)[0])
1770
- max_shapes.append(fix_shape)
1771
- min_shapes.append(fix_shape)
1772
- dynamic_shapes.append(fix_shape)
1773
- if col in dynamic_columns:
1774
- logger.warning("column [" + col + "] has no dynamic shape but set by set_dynamic_columns()")
1775
- # Set min shape to 1 due to unknown shuffle
1776
- min_shapes[-1] = np.where(np.equal(dynamic_columns[col], None), 1, fix_shape).tolist()
1777
- # Set dynamic dim to -1 for ME
1778
- dynamic_shapes[-1] = np.where(np.equal(dynamic_columns[col], None), -1, fix_shape).tolist()
1779
- return dynamic_shapes, min_shapes, max_shapes
1780
-
1781
1680
  def num_classes(self):
1782
1681
  """
1783
1682
  Get the number of classes in a dataset.
@@ -1820,8 +1719,41 @@ class Dataset:
1820
1719
  condition_name (str): The condition name that is used to toggle sending next row.
1821
1720
  num_batch (Union[int, None]): The number of batches (rows) that are released.
1822
1721
  When num_batch is None, it will default to the number specified by the
1823
- sync_wait operator (default=None).
1824
- data (Any): The data passed to the callback, user defined (default=None).
1722
+ sync_wait operation. Default: None.
1723
+ data (Any): The data passed to the callback, user defined. Default: None.
1724
+
1725
+ Examples:
1726
+ >>> import numpy as np
1727
+ >>>
1728
+ >>>
1729
+ >>> def gen():
1730
+ ... for i in range(100):
1731
+ ... yield (np.array(i),)
1732
+ >>>
1733
+ >>>
1734
+ >>> class Augment:
1735
+ ... def __init__(self, loss):
1736
+ ... self.loss = loss
1737
+ ...
1738
+ ... def preprocess(self, input_):
1739
+ ... return input_
1740
+ ...
1741
+ ... def update(self, data):
1742
+ ... self.loss = data["loss"]
1743
+ >>>
1744
+ >>>
1745
+ >>> batch_size = 10
1746
+ >>> dataset = ds.GeneratorDataset(gen, column_names=["input"])
1747
+ >>> aug = Augment(0)
1748
+ >>> dataset = dataset.sync_wait(condition_name='', num_batch=1)
1749
+ >>> dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
1750
+ >>> dataset = dataset.batch(batch_size)
1751
+ >>>
1752
+ >>> count = 0
1753
+ >>> for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1754
+ ... count += 1
1755
+ ... data = {"loss": count}
1756
+ ... dataset.sync_update(condition_name="", data=data)
1825
1757
  """
1826
1758
  if (not isinstance(num_batch, int) and num_batch is not None) or \
1827
1759
  (isinstance(num_batch, int) and num_batch <= 0):
@@ -1845,7 +1777,7 @@ class Dataset:
1845
1777
  Return the size of batch.
1846
1778
 
1847
1779
  Returns:
1848
- int, the number of data in a batch.
1780
+ int, the batch size of data.
1849
1781
 
1850
1782
  Examples:
1851
1783
  >>> # dataset is an instance object of Dataset
@@ -1860,7 +1792,7 @@ class Dataset:
1860
1792
 
1861
1793
  def get_repeat_count(self):
1862
1794
  """
1863
- Get the replication times in RepeatDataset (default is 1).
1795
+ Get the replication times in RepeatDataset. Default: 1.
1864
1796
 
1865
1797
  Returns:
1866
1798
  int, the count of repeat.
@@ -1894,7 +1826,18 @@ class Dataset:
1894
1826
  return {}
1895
1827
 
1896
1828
  def reset(self):
1897
- """Reset the dataset for next epoch."""
1829
+ """
1830
+ Reset the dataset for next epoch.
1831
+
1832
+ Examples:
1833
+ >>> mind_dataset_dir = ["/path/to/mind_dataset_file"]
1834
+ >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
1835
+ >>> for _ in range(5):
1836
+ ... num_iter = 0
1837
+ ... for data in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
1838
+ ... num_iter += 1
1839
+ ... dataset.reset()
1840
+ """
1898
1841
 
1899
1842
  def is_shuffled(self):
1900
1843
  """Returns True if the dataset or its children is shuffled."""
@@ -1915,6 +1858,15 @@ class Dataset:
1915
1858
  def parse(self, children=None):
1916
1859
  raise NotImplementedError("Dataset has to implement parse method.")
1917
1860
 
1861
+ def __len__(self):
1862
+ """
1863
+ Get the length of dataset.
1864
+
1865
+ Returns:
1866
+ int, the length of dataset.
1867
+ """
1868
+ return self.get_dataset_size()
1869
+
1918
1870
  @staticmethod
1919
1871
  def _update_data_shard(num_shards, shard_id):
1920
1872
  """
@@ -1970,6 +1922,10 @@ class TextBaseDataset(Dataset):
1970
1922
  Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab
1971
1923
  which contains top_k most frequent words (if top_k is specified).
1972
1924
 
1925
+ Note:
1926
+ mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0
1927
+ and will be removed in a future version. Use mindspore.dataset.text.Vocab.from_dataset instead.
1928
+
1973
1929
  Args:
1974
1930
  columns(Union[str, list[str]]): Column names to get words from.
1975
1931
  freq_range(tuple[int]): A tuple of integers (min_frequency, max_frequency). Words within the frequency
@@ -1984,22 +1940,60 @@ class TextBaseDataset(Dataset):
1984
1940
 
1985
1941
  Returns:
1986
1942
  Vocab, vocab built from the dataset.
1943
+ """
1944
+ warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
1945
+ "and will be removed in a future version. "
1946
+ "Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
1987
1947
 
1988
- Examples:
1989
- >>> import numpy as np
1990
- >>>
1991
- >>> def gen_corpus():
1992
- ... # key: word, value: number of occurrences, reason for using letters is so their order is apparent
1993
- ... corpus = {"Z": 4, "Y": 4, "X": 4, "W": 3, "U": 3, "V": 2, "T": 1}
1994
- ... for k, v in corpus.items():
1995
- ... yield (np.array([k] * v, dtype='S'),)
1996
- >>> column_names = ["column1"]
1997
- >>> dataset = ds.GeneratorDataset(gen_corpus, column_names)
1998
- >>> dataset = dataset.build_vocab(columns=["column1"],
1999
- ... freq_range=(1, 10), top_k=5,
2000
- ... special_tokens=["<pad>", "<unk>"],
2001
- ... special_first=True)
1948
+ def build_sentencepiece_vocab(self, columns, vocab_size, character_coverage, model_type, params):
1949
+ """
1950
+ Function to create a SentencePieceVocab from source dataset.
1951
+ Desired source dataset is a text type dataset.
1952
+
1953
+ Note:
1954
+ mindspore.dataset.Dataset.build_sentencepiece_vocab is deprecated from version 2.0
1955
+ and will be removed in a future version. Use mindspore.dataset.text.SentencePieceVocab.from_dataset instead.
1956
+
1957
+ Args:
1958
+ columns(list[str]): Column names to get words from.
1959
+ vocab_size(int): Vocabulary size.
1960
+ character_coverage(float): Percentage of characters covered by the model, must be between
1961
+ 0.98 and 1.0 Good defaults are: 0.9995 for languages with rich character sets like
1962
+ Japanese or Chinese character sets, and 1.0 for other languages with small character sets
1963
+ like English or Latin.
1964
+ model_type(SentencePieceModel): Model type. Choose from unigram (default), bpe, char, or word.
1965
+ The input sentence must be pretokenized when using word type.
1966
+ params(dict): Any extra optional parameters of sentencepiece library according to your raw data
1967
+
1968
+ Returns:
1969
+ SentencePieceVocab, vocab built from the dataset.
1970
+ """
1971
+ warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
1972
+ "and will be removed in a future version. "
1973
+ "Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
2002
1974
 
1975
+ def _build_vocab(self, columns, freq_range, top_k, special_tokens, special_first):
1976
+ """
1977
+ Function to create a Vocab from source dataset.
1978
+ Desired source dataset is a text type dataset.
1979
+
1980
+ Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab
1981
+ which contains top_k most frequent words (if top_k is specified).
1982
+
1983
+ Args:
1984
+ columns(Union[str, list[str]]): Column names to get words from.
1985
+ freq_range(tuple[int]): A tuple of integers (min_frequency, max_frequency). Words within the frequency
1986
+ range will be stored.
1987
+ Naturally 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency
1988
+ can be set to default, which corresponds to 0/total_words separately.
1989
+ top_k(int): Number of words to be built into vocab. top_k most frequent words are
1990
+ taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken
1991
+ special_tokens(list[str]): A list of strings, each one is a special token.
1992
+ special_first(bool): Whether special_tokens will be prepended/appended to vocab, If special_tokens
1993
+ is specified and special_first is set to default, special_tokens will be prepended.
1994
+
1995
+ Returns:
1996
+ Vocab, vocab built from the dataset.
2003
1997
  """
2004
1998
  vocab = cde.Vocab()
2005
1999
  columns = replace_none(columns, [])
@@ -2032,7 +2026,7 @@ class TextBaseDataset(Dataset):
2032
2026
 
2033
2027
  return vocab
2034
2028
 
2035
- def build_sentencepiece_vocab(self, columns, vocab_size, character_coverage, model_type, params):
2029
+ def _build_sentencepiece_vocab(self, columns, vocab_size, character_coverage, model_type, params):
2036
2030
  """
2037
2031
  Function to create a SentencePieceVocab from source dataset.
2038
2032
  Desired source dataset is a text type dataset.
@@ -2050,13 +2044,6 @@ class TextBaseDataset(Dataset):
2050
2044
 
2051
2045
  Returns:
2052
2046
  SentencePieceVocab, vocab built from the dataset.
2053
-
2054
- Examples:
2055
- >>> from mindspore.dataset.text import SentencePieceModel
2056
- >>>
2057
- >>> # You can construct any text dataset as source, take TextFileDataset as example.
2058
- >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
2059
- >>> dataset = dataset.build_sentencepiece_vocab(["text"], 5000, 0.9995, SentencePieceModel.UNIGRAM, {})
2060
2047
  """
2061
2048
  if not isinstance(model_type, SentencePieceModel):
2062
2049
  raise TypeError("Argument model_type with value {0} is not of type SentencePieceModel, but got {1}." \
@@ -2264,7 +2251,7 @@ class MappableDataset(SourceDataset):
2264
2251
  - The sum of split sizes > K, the difference will be removed from the first large
2265
2252
  enough split such that it will have at least 1 row after removing the difference.
2266
2253
 
2267
- randomize (bool, optional): Determines whether or not to split the data randomly (default=True).
2254
+ randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
2268
2255
  If True, the data will be randomly split. Otherwise, each split will be created with
2269
2256
  consecutive rows from the dataset.
2270
2257
 
@@ -2340,7 +2327,7 @@ class MappableDataset(SourceDataset):
2340
2327
 
2341
2328
  class BucketBatchByLengthDataset(UnionBaseDataset):
2342
2329
  """
2343
- The result of applying BucketBatchByLength operator to the input dataset.
2330
+ The result of applying BucketBatchByLength operation to the input dataset.
2344
2331
  """
2345
2332
 
2346
2333
  def __init__(self, input_dataset, column_names, bucket_boundaries, bucket_batch_sizes, element_length_function,
@@ -2391,17 +2378,17 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
2391
2378
 
2392
2379
  class BatchDataset(UnionBaseDataset):
2393
2380
  """
2394
- The result of applying Batch operator to the input dataset.
2381
+ The result of applying Batch operation to the input dataset.
2395
2382
 
2396
2383
  Args:
2397
2384
  input_dataset (Dataset): Input Dataset to be batched.
2398
2385
  batch_size (Union[int, function]): The number of rows each batch is created with. An
2399
2386
  int or callable which takes exactly 1 parameter, BatchInfo.
2400
2387
  drop_remainder (bool, optional): Determines whether or not to drop the last
2401
- possibly incomplete batch (default=False). If True, and if there are less
2388
+ possibly incomplete batch. Default: False. If True, and if there are less
2402
2389
  than batch_size rows available to make the last batch, then those rows will
2403
2390
  be dropped and not propagated to the child node.
2404
- num_parallel_workers (int, optional): Number of workers to process the dataset in parallel (default=None).
2391
+ num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
2405
2392
  per_batch_map (callable, optional): Per batch map callable. A callable which takes
2406
2393
  (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch of
2407
2394
  Tensors on a given column. The number of lists should match with number of entries in input_columns. The
@@ -2411,21 +2398,15 @@ class BatchDataset(UnionBaseDataset):
2411
2398
  output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
2412
2399
  the last operation. This parameter is mandatory if len(input_columns) !=
2413
2400
  len(output_columns). The size of this list must match the number of output
2414
- columns of the last operation. (default=None, output columns will have the same
2415
- name as the input columns, i.e., the columns will be replaced).
2416
- column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
2417
- dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
2418
- is not just the columns specified in parameter input_columns and output_columns.
2419
- pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
2420
- will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
2401
+ columns of the last operation. Default: None, output columns will have the same
2402
+ name as the input columns, i.e., the columns will be replaced.
2421
2403
  max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
2422
- data between processes. This is only used if python_multiprocessing is set to True (default=16).
2404
+ data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
2423
2405
 
2424
2406
  """
2425
2407
 
2426
2408
  def __init__(self, input_dataset, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None,
2427
- input_columns=None, output_columns=None, column_order=None, pad_info=None,
2428
- python_multiprocessing=False, max_rowsize=16):
2409
+ input_columns=None, output_columns=None, python_multiprocessing=False, max_rowsize=16):
2429
2410
  super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers)
2430
2411
 
2431
2412
  if BatchDataset._is_ancestor_of_repeat(input_dataset):
@@ -2443,10 +2424,6 @@ class BatchDataset(UnionBaseDataset):
2443
2424
 
2444
2425
  self.input_columns = to_list(input_columns)
2445
2426
  self.output_columns = to_list(output_columns)
2446
- self.column_order = to_list(column_order)
2447
-
2448
- self.pad = bool(pad_info is not None)
2449
- self.pad_info = replace_none(pad_info, dict())
2450
2427
 
2451
2428
  self.python_multiprocessing = python_multiprocessing
2452
2429
  self.process_pool = None
@@ -2458,9 +2435,9 @@ class BatchDataset(UnionBaseDataset):
2458
2435
  del self.process_pool
2459
2436
 
2460
2437
  def parse(self, children=None):
2461
- return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, self.pad, self.input_columns,
2462
- self.output_columns, self.column_order, self.batch_size_func, self.per_batch_map,
2463
- self.pad_info, self.process_pool)
2438
+ return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, False, self.input_columns,
2439
+ self.output_columns, self.batch_size_func, self.per_batch_map, {},
2440
+ self.process_pool)
2464
2441
 
2465
2442
  @staticmethod
2466
2443
  def _is_ancestor_of_repeat(dataset):
@@ -2506,6 +2483,10 @@ class BatchDataset(UnionBaseDataset):
2506
2483
  """
2507
2484
  if self.python_multiprocessing and platform.system().lower() == 'windows':
2508
2485
  logger.warning("Python multiprocessing is not supported on Windows platform.")
2486
+ if self.python_multiprocessing and get_debug_mode():
2487
+ logger.warning("Python multiprocessing is not supported in debug mode."
2488
+ " Ignoring Python multiprocessing for batch operation.")
2489
+ self.python_multiprocessing = False
2509
2490
  if self.python_multiprocessing and platform.system().lower() != 'windows':
2510
2491
  if self.per_batch_map is None:
2511
2492
  logger.warning("per_batch_map is None so python_multiprocessing is ignored for batch.")
@@ -2526,7 +2507,7 @@ class BatchDataset(UnionBaseDataset):
2526
2507
 
2527
2508
  class BatchInfo(cde.CBatchInfo):
2528
2509
  """
2529
- Only the batch size function and per_batch_map of the batch operator can dynamically adjust parameters
2510
+ Only the batch size function and per_batch_map of the batch operation can dynamically adjust parameters
2530
2511
  based on the number of batches and epochs during training.
2531
2512
  """
2532
2513
 
@@ -2549,7 +2530,7 @@ class BlockReleasePair:
2549
2530
 
2550
2531
  Args:
2551
2532
  init_release_rows (int): Number of lines to allow through the pipeline.
2552
- callback (function): The callback function that will be called when release is called (default=None).
2533
+ callback (function): The callback function that will be called when release is called. Default: None.
2553
2534
  """
2554
2535
 
2555
2536
  def __init__(self, init_release_rows, callback=None):
@@ -2612,6 +2593,80 @@ class BlockReleasePair:
2612
2593
  self.cv.notify_all()
2613
2594
 
2614
2595
 
2596
+ class PaddedBatchDataset(UnionBaseDataset):
2597
+ """
2598
+ The result of applying Batch operation to the input dataset.
2599
+
2600
+ Args:
2601
+ input_dataset (Dataset): Input Dataset to be batched.
2602
+ batch_size (Union[int, function]): The number of rows each batch is created with. An
2603
+ int or callable which takes exactly 1 parameter, BatchInfo.
2604
+ drop_remainder (bool, optional): Determines whether or not to drop the last
2605
+ possibly incomplete batch. Default: False. If True, and if there are less
2606
+ than batch_size rows available to make the last batch, then those rows will
2607
+ be dropped and not propagated to the child node.
2608
+ num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
2609
+ pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
2610
+ will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
2611
+ """
2612
+
2613
+ def __init__(self, input_dataset, batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None):
2614
+ super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers)
2615
+
2616
+ if PaddedBatchDataset._is_ancestor_of_repeat(input_dataset):
2617
+ logger.warning("Repeat is located before padded_batch, data from two epochs can be batched together.")
2618
+
2619
+ PaddedBatchDataset._update_batch_size_for_syncwait(input_dataset, batch_size)
2620
+
2621
+ # if batch_size is callable, set batch_size to 1 and batch_size_func to that callable function
2622
+ self.batch_size = batch_size if not callable(batch_size) else 1
2623
+ self.batch_size_func = None if not callable(batch_size) else batch_size
2624
+
2625
+ self.drop_remainder = replace_none(drop_remainder, False)
2626
+
2627
+ self.pad = bool(pad_info is not None)
2628
+ self.pad_info = replace_none(pad_info, dict())
2629
+
2630
+ def parse(self, children=None):
2631
+ return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, self.pad, [],
2632
+ [], self.batch_size_func, None, self.pad_info, None)
2633
+
2634
+ @staticmethod
2635
+ def _is_ancestor_of_repeat(dataset):
2636
+ """
2637
+ Utility function to find the case where repeat is used before batch.
2638
+
2639
+ Args:
2640
+ dataset (Dataset): Dataset to be checked.
2641
+
2642
+ Returns:
2643
+ bool, whether repeat is used before batch.
2644
+ """
2645
+ if isinstance(dataset, RepeatDataset):
2646
+ return True
2647
+ flag = False
2648
+ for input_dataset in dataset.children:
2649
+ flag = flag | PaddedBatchDataset._is_ancestor_of_repeat(input_dataset)
2650
+ return flag
2651
+
2652
+ @staticmethod
2653
+ def _update_batch_size_for_syncwait(dataset, batch_size):
2654
+ """
2655
+ Utility function to notify batch size to sync_wait.
2656
+
2657
+ Args:
2658
+ dataset (Dataset): Dataset to be checked.
2659
+ batch_size (int): batch size to notify.
2660
+ """
2661
+ if isinstance(dataset, SyncWaitDataset):
2662
+ dataset.update_sync_batch_size(batch_size)
2663
+ for input_dataset in dataset.children:
2664
+ PaddedBatchDataset._update_batch_size_for_syncwait(input_dataset, batch_size)
2665
+
2666
+ def __deepcopy__(self, memodict):
2667
+ return self.__safe_deepcopy__(memodict, exclude=("batch_size_func", "__transfer_dataset__"))
2668
+
2669
+
2615
2670
  class SyncWaitDataset(UnionBaseDataset):
2616
2671
  """
2617
2672
  The result of adding a blocking condition to the input Dataset.
@@ -2620,7 +2675,7 @@ class SyncWaitDataset(UnionBaseDataset):
2620
2675
  input_dataset (Dataset): Input dataset to apply flow control.
2621
2676
  num_batch (int): Number of batches without blocking at the start of each epoch.
2622
2677
  condition_name (str): Condition name that is used to toggle sending next row.
2623
- callback (function): Callback function that will be invoked when sync_update is called (default=None).
2678
+ callback (function): Callback function that will be invoked when sync_update is called. Default: None.
2624
2679
 
2625
2680
  Raises:
2626
2681
  RuntimeError: If condition name already exists.
@@ -2670,7 +2725,7 @@ class SyncWaitDataset(UnionBaseDataset):
2670
2725
  Returns:
2671
2726
  bool, whether sync_wait is used before batch.
2672
2727
  """
2673
- if isinstance(dataset, BatchDataset):
2728
+ if isinstance(dataset, (BatchDataset, PaddedBatchDataset)):
2674
2729
  return True
2675
2730
  flag = False
2676
2731
  for input_dataset in dataset.children:
@@ -2683,14 +2738,14 @@ class SyncWaitDataset(UnionBaseDataset):
2683
2738
 
2684
2739
  class ShuffleDataset(UnionBaseDataset):
2685
2740
  """
2686
- The result of applying Shuffle operator to the input Dataset.
2741
+ The result of applying Shuffle operation to the input Dataset.
2687
2742
 
2688
2743
  Args:
2689
2744
  input_dataset (Dataset): Input Dataset to be shuffled.
2690
2745
  buffer_size (int): Size of the buffer.
2691
2746
 
2692
2747
  Raises:
2693
- RuntimeError: If exist sync operators before shuffle.
2748
+ RuntimeError: If exist sync operations before shuffle.
2694
2749
  """
2695
2750
 
2696
2751
  def __init__(self, input_dataset, buffer_size):
@@ -2727,17 +2782,38 @@ class _PythonCallable:
2727
2782
  self.pool = pool
2728
2783
  # Python callable index
2729
2784
  self.idx = idx
2785
+ self.check_interval = get_multiprocessing_timeout_interval()
2730
2786
 
2731
2787
  def __call__(self, *args):
2732
2788
  result = None
2733
- if self.pool.is_running() and check_iterator_cleanup() is False:
2734
- try:
2735
- result = self.pool.execute(self.idx, *args)
2736
- except multiprocessing.TimeoutError:
2737
- pass
2738
- if result is None:
2739
- # Invoke original Python callable in master process in case the pool is gone.
2740
- result = self.py_callable(*args)
2789
+ start_time = time.time()
2790
+ count = 1
2791
+ get_data_from_worker_process = False
2792
+ while get_data_from_worker_process is False:
2793
+ cost_time = time.time() - start_time
2794
+ if cost_time > (self.check_interval * count):
2795
+ logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
2796
+ "workers of map operation cost long time to process next data. "
2797
+ "Worker process list are: " + str(self.pool.get_pids()) + ", you can use "
2798
+ "\"py-spy dump -p {PID} -l -s \""
2799
+ "to dump the worker process stack. You can also set the timeout interval by "
2800
+ "ds.config.set_multiprocessing_interval to adjust the output frequency of this "
2801
+ "log.")
2802
+ count += 1
2803
+ if self.pool.is_running() and check_iterator_cleanup() is False:
2804
+ try:
2805
+ result = self.pool.execute(self.idx, *args)
2806
+ except multiprocessing.TimeoutError:
2807
+ continue
2808
+ get_data_from_worker_process = True
2809
+ else:
2810
+ # worker process is stopped
2811
+ logger.info("The worker process of map operation is stopped. "
2812
+ "So return None to main thread and break the main thread.")
2813
+ return None
2814
+ # got value from worker process
2815
+ if not isinstance(result, tuple) and get_data_from_worker_process is True:
2816
+ result = (result,)
2741
2817
  return result
2742
2818
 
2743
2819
  def to_json(self):
@@ -2833,7 +2909,7 @@ def _worker_loop(operations, pipe, seed=get_seed()):
2833
2909
  pipe.worker_send(output_tensors)
2834
2910
  except Exception:
2835
2911
  pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
2836
- return
2912
+ # Do not return
2837
2913
 
2838
2914
 
2839
2915
  def worker_target(operations, seed=get_seed()):
@@ -2922,7 +2998,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
2922
2998
  self.ppid = os.getpid()
2923
2999
  self.hook = None
2924
3000
  self.warning_ctl = None
2925
- self.threads_to_workers = {}
3001
+ # cache thread (get_ident()) to worker_id mapping in Python layer
3002
+ self.python_threads_to_workers = {}
2926
3003
 
2927
3004
  def __del__(self):
2928
3005
  try:
@@ -3067,13 +3144,24 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3067
3144
  os.kill(os.getpid(), signal.SIGTERM)
3068
3145
 
3069
3146
  def launch(self, op_id=-1):
3147
+ """
3148
+ Launch Python multiprocessing pool.
3149
+
3150
+ Args:
3151
+ pop_id: ID for operation to have Python multiprocessing pool launched
3152
+
3153
+ Returns:
3154
+ Python multiprocssing pool is launched.
3155
+ """
3156
+ self.python_threads_to_workers = {}
3070
3157
  self.op_id = op_id
3071
3158
  logger.info("Launching new Python Multiprocessing pool for Op:" + str(self.op_id))
3072
3159
  if self.is_mp_enabled():
3073
- logger.warning('Launching a new Python multiprocessing pool while a pool already exists! \
3074
- The existing pool will be terminated first.')
3160
+ message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
3161
+ " The existing pool will be terminated first."
3162
+ logger.warning(message)
3075
3163
  self.terminate()
3076
- self.threads_to_workers = {}
3164
+ self.reset()
3077
3165
  self.create_pool()
3078
3166
 
3079
3167
  def create_pool(self):
@@ -3109,7 +3197,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3109
3197
  atexit.register(self.terminate)
3110
3198
 
3111
3199
  def terminate(self):
3112
- logger.info("Terminating Python Multiprocessing for Op:" + str(self.op_id))
3113
3200
  self.close_all_workers()
3114
3201
  self.abort_watchdog()
3115
3202
 
@@ -3166,7 +3253,10 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3166
3253
  Execute
3167
3254
  """
3168
3255
  t_id = threading.get_ident()
3169
- worker_id = self.threads_to_workers.setdefault(t_id, len(self.threads_to_workers))
3256
+ # get the worker_id from Python layer cache first, get from Cpp layer if not found.
3257
+ worker_id = self.python_threads_to_workers.setdefault(t_id, self.get_thread_to_worker())
3258
+ if worker_id >= len(self.workers):
3259
+ raise RuntimeError("[Internal] worker_id value is greater than number of available workers!")
3170
3260
 
3171
3261
  # todo check_iterator_cleanup
3172
3262
  if self.is_running() and check_iterator_cleanup() is False:
@@ -3220,38 +3310,32 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3220
3310
 
3221
3311
  class MapDataset(UnionBaseDataset):
3222
3312
  """
3223
- The result of applying the Map operator to the input Dataset.
3313
+ The result of applying the Map operation to the input Dataset.
3224
3314
 
3225
3315
  Args:
3226
3316
  input_dataset (Dataset): Input Dataset to be mapped.
3227
3317
  operations (Union[list[TensorOperation], list[functions]]): A function mapping a nested structure of tensors
3228
- to another nested structure of tensor (default=None).
3229
- input_columns (Union[str, list[str]]): List of names of the input columns
3230
- (default=None, the operations will be applied on the first columns in the dataset).
3231
- The size of the list should match the number of inputs of the first operator.
3318
+ to another nested structure of tensor. Default: None.
3319
+ input_columns (Union[str, list[str]]): List of names of the input columns.
3320
+ Default: None, the operations will be applied on the first columns in the dataset.
3321
+ The size of the list should match the number of inputs of the first operation.
3232
3322
  output_columns (Union[str, list[str]], optional): List of names of the output columns.
3233
- The size of the list should match the number of outputs of the last operator
3234
- (default=None, output columns will be the input columns, i.e., the columns will
3235
- be replaced).
3236
- column_order (list[str], optional): Specifies the list of all the columns you need in the whole
3237
- dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
3238
- is not just the columns specified in parameter input_columns and output_columns.
3323
+ The size of the list should match the number of outputs of the last operation.
3324
+ Default: None, output columns will be the input columns, i.e., the columns will
3325
+ be replaced.
3239
3326
  num_parallel_workers (int, optional): Number of workers to process the dataset
3240
- in parallel (default=None).
3327
+ in parallel. Default: None.
3241
3328
  python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
3242
- option could be beneficial if the Python operation is computational heavy (default=False).
3329
+ option could be beneficial if the Python operation is computational heavy. Default: False.
3243
3330
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
3244
- (default=None, which means no cache is used).
3245
- callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called (Default=None)
3331
+ Default: None, which means no cache is used.
3332
+ callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: None.
3246
3333
  max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
3247
- data between processes. This is only used if python_multiprocessing is set to True (default=16).
3248
- offload (bool, optional): Flag to indicate whether offload is used (Default=None).
3249
-
3250
- Raises:
3251
- ValueError: If len(input_columns) != len(output_columns) and column_order is not specified.
3334
+ data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
3335
+ offload (bool, optional): Flag to indicate whether offload is used. Default: None.
3252
3336
  """
3253
3337
 
3254
- def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None, column_order=None,
3338
+ def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
3255
3339
  num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None, max_rowsize=16,
3256
3340
  offload=None):
3257
3341
  super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers, cache=cache)
@@ -3270,17 +3354,10 @@ class MapDataset(UnionBaseDataset):
3270
3354
 
3271
3355
  self.input_columns = to_list(input_columns)
3272
3356
  self.output_columns = to_list(output_columns)
3273
- self.column_order = replace_none(column_order, [])
3274
3357
 
3275
3358
  # If output_columns were not provided then use input_columns
3276
3359
  self.output_columns = self.input_columns if not self.output_columns else self.output_columns
3277
3360
 
3278
- if self.input_columns and self.output_columns \
3279
- and len(self.input_columns) != len(self.output_columns) \
3280
- and not self.column_order:
3281
- raise ValueError("When length of input_columns and output_columns are not equal,"
3282
- " column_order must be specified.")
3283
-
3284
3361
  self.python_multiprocessing = python_multiprocessing
3285
3362
  self.process_pool = None
3286
3363
 
@@ -3297,14 +3374,23 @@ class MapDataset(UnionBaseDataset):
3297
3374
  if count_new_transforms + count_pyfunc == len(operations):
3298
3375
  prev_op = None
3299
3376
  for op in operations:
3377
+ # skip user added DebugHook to avoid changing to Py-implementation.
3378
+ if self.__is_debug_hook_op(op):
3379
+ if prev_op:
3380
+ # manually set previous_op_name
3381
+ prev_op_name = self.__parse_op_name(prev_op)
3382
+ op.set_previous_op_name(prev_op_name)
3383
+ continue
3300
3384
  if op.implementation is None:
3301
3385
  if prev_op and prev_op.implementation == Implementation.PY:
3302
3386
  op.implementation = Implementation.PY
3303
3387
  else:
3304
3388
  op.implementation = Implementation.C
3305
3389
  prev_op = op
3390
+ operations = self.__insert_debug_wrapper(operations)
3306
3391
  operations = transforms.transforms.Compose.reduce(operations)
3307
3392
  elif count_old_transforms + count_pyfunc + count_non_data_vision_transforms == len(operations):
3393
+ operations = self.__insert_debug_wrapper(operations)
3308
3394
  operations = transforms.py_transforms.Compose.reduce(operations)
3309
3395
  else:
3310
3396
  raise RuntimeError("Mixing old legacy c/py_transforms and new unified transforms is not allowed.")
@@ -3313,7 +3399,7 @@ class MapDataset(UnionBaseDataset):
3313
3399
  self.prepare_multiprocessing()
3314
3400
 
3315
3401
  callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
3316
- return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns, self.column_order,
3402
+ return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
3317
3403
  callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3318
3404
 
3319
3405
  def __deepcopy__(self, memodict):
@@ -3324,6 +3410,49 @@ class MapDataset(UnionBaseDataset):
3324
3410
  self.process_pool.terminate()
3325
3411
  del self.process_pool
3326
3412
 
3413
+ @staticmethod
3414
+ def __parse_op_name(op):
3415
+ """
3416
+ Utility method to get operation name.
3417
+ """
3418
+ op_name = ""
3419
+ if isinstance(op, transforms.py_transforms_util.FuncWrapper):
3420
+ try:
3421
+ op_name = op.transform.__name__
3422
+ except (AttributeError,):
3423
+ op_name = op.transform.__class__.__name__
3424
+ else:
3425
+ op_name = op.__class__.__name__
3426
+ return op_name
3427
+
3428
+ @staticmethod
3429
+ def __construct_debug_hook(previous_op_name=None):
3430
+ """
3431
+ Wrap debug hook into FuncWrapper.
3432
+ """
3433
+ inserted_functions = []
3434
+ debug_hook_list = _get_debug_hook_list()
3435
+ if debug_hook_list:
3436
+ for fn in debug_hook_list:
3437
+ # making deep copy to allow each debug hook instance hold unique variables
3438
+ new_fn = copy.deepcopy(fn)
3439
+ new_fn.set_previous_op_name(previous_op_name)
3440
+ inserted_func = transforms.py_transforms_util.FuncWrapper(new_fn)
3441
+ inserted_func.implementation = Implementation.PY
3442
+ inserted_functions.append(inserted_func)
3443
+ return inserted_functions
3444
+
3445
+ @staticmethod
3446
+ def __is_debug_hook_op(op):
3447
+ """
3448
+ Check if the op is user added DebugHook and skip it to avoid changing transforms implementation.
3449
+ """
3450
+ if isinstance(op, DebugHook):
3451
+ if not get_debug_mode():
3452
+ raise ValueError("It is not allowed to inject DebugHook object in non-debug mode.")
3453
+ return True
3454
+ return False
3455
+
3327
3456
  @staticmethod
3328
3457
  def __count_pyfuncs(operations):
3329
3458
  """
@@ -3389,6 +3518,10 @@ class MapDataset(UnionBaseDataset):
3389
3518
  if self.python_multiprocessing and platform.system().lower() == 'windows':
3390
3519
  logger.warning("Python multiprocessing is not supported on Windows platform.")
3391
3520
  return
3521
+ if self.python_multiprocessing and get_debug_mode():
3522
+ logger.warning("Python multiprocessing is not supported in debug mode."
3523
+ " Ignoring Python multiprocessing for map operation.")
3524
+ return
3392
3525
  if self.python_multiprocessing:
3393
3526
  iter_specific_operations = []
3394
3527
  callable_list = []
@@ -3419,6 +3552,19 @@ class MapDataset(UnionBaseDataset):
3419
3552
  iter_specific_operations.append(op)
3420
3553
  self.operations = iter_specific_operations
3421
3554
 
3555
+ def __insert_debug_wrapper(self, operations):
3556
+ """
3557
+ Insert DebuggerWrapper before and after each op if debug mode is on.
3558
+ """
3559
+ if not get_debug_mode():
3560
+ return operations
3561
+ inserted_operations = self.__construct_debug_hook()
3562
+ for op in operations:
3563
+ inserted_operations.append(op)
3564
+ op_name = self.__parse_op_name(op)
3565
+ inserted_operations.extend(self.__construct_debug_hook(op_name))
3566
+ return inserted_operations
3567
+
3422
3568
  def __decompose_callable_operations(self):
3423
3569
  """
3424
3570
  Decompose operations and build list of old legacy ops which are callable
@@ -3441,10 +3587,10 @@ class FilterDataset(UnionBaseDataset):
3441
3587
  Args:
3442
3588
  input_dataset (Dataset): Input Dataset to be mapped.
3443
3589
  predicate (callable): Python callable which returns a boolean value. If False then filter the element.
3444
- input_columns (Union[str, list[str]], optional): List of names of the input columns
3445
- (default=None, the predicate will be applied to all columns in the dataset).
3590
+ input_columns (Union[str, list[str]], optional): List of names of the input columns.
3591
+ Default: None, the predicate will be applied to all columns in the dataset.
3446
3592
  num_parallel_workers (int, optional): Number of workers to process the dataset
3447
- in parallel (default=None).
3593
+ in parallel. Default: None.
3448
3594
  """
3449
3595
 
3450
3596
  def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None):
@@ -3458,11 +3604,11 @@ class FilterDataset(UnionBaseDataset):
3458
3604
 
3459
3605
  class RepeatDataset(UnionBaseDataset):
3460
3606
  """
3461
- The result of applying Repeat operator to the input Dataset.
3607
+ The result of applying Repeat operation to the input Dataset.
3462
3608
 
3463
3609
  Args:
3464
3610
  input_dataset (Dataset): Input Dataset to be repeated.
3465
- count (int): Number of times the dataset will be repeated (default=-1, repeat indefinitely).
3611
+ count (int): Number of times the dataset will be repeated. Default: -1, repeat indefinitely.
3466
3612
  """
3467
3613
 
3468
3614
  def __init__(self, input_dataset, count):
@@ -3475,7 +3621,7 @@ class RepeatDataset(UnionBaseDataset):
3475
3621
 
3476
3622
  class SkipDataset(UnionBaseDataset):
3477
3623
  """
3478
- The result of applying Skip operator to the input Dataset.
3624
+ The result of applying Skip operation to the input Dataset.
3479
3625
 
3480
3626
  Args:
3481
3627
  input_dataset (Dataset): Input dataset to have elements skipped.
@@ -3492,7 +3638,7 @@ class SkipDataset(UnionBaseDataset):
3492
3638
 
3493
3639
  class TakeDataset(UnionBaseDataset):
3494
3640
  """
3495
- The result of applying Take operator to the input Dataset.
3641
+ The result of applying Take operation to the input Dataset.
3496
3642
 
3497
3643
  Args:
3498
3644
  input_dataset (Dataset): Input Dataset to have elements taken from.
@@ -3509,7 +3655,7 @@ class TakeDataset(UnionBaseDataset):
3509
3655
 
3510
3656
  class ZipDataset(UnionBaseDataset):
3511
3657
  """
3512
- The result of applying Zip operator to the input Dataset.
3658
+ The result of applying Zip operation to the input Dataset.
3513
3659
 
3514
3660
  Args:
3515
3661
  datasets (tuple): A tuple of datasets to be zipped together.
@@ -3530,7 +3676,7 @@ class ZipDataset(UnionBaseDataset):
3530
3676
 
3531
3677
  class ConcatDataset(UnionBaseDataset):
3532
3678
  """
3533
- The result of applying concat dataset operator to the input Dataset.
3679
+ The result of applying Concat operation to the input Dataset.
3534
3680
 
3535
3681
  Args:
3536
3682
  datasets (list): A list of datasets to be concatenated together.
@@ -3615,8 +3761,8 @@ class ConcatDataset(UnionBaseDataset):
3615
3761
  if hasattr(child, 'sampler') and child.sampler.get_num_samples() is not None:
3616
3762
  raise ValueError("The parameter NumSamples of %s is not support to be set!" % child)
3617
3763
 
3618
- if isinstance(child, BatchDataset):
3619
- raise TypeError("The parameter %s of concat must not be BatchDataset!" % child)
3764
+ if isinstance(child, (BatchDataset, PaddedBatchDataset)):
3765
+ raise TypeError("The parameter %s of concat must not be BatchDataset or PaddedBatchDataset!" % child)
3620
3766
 
3621
3767
  # if child is mappable and the length is greater than 0
3622
3768
  if not self._children_flag_and_nums[index][0] and self._children_flag_and_nums[index][1]:
@@ -3641,7 +3787,7 @@ class ConcatDataset(UnionBaseDataset):
3641
3787
 
3642
3788
  class RenameDataset(UnionBaseDataset):
3643
3789
  """
3644
- The result of applying Rename operator to the input Dataset.
3790
+ The result of applying Rename operation to the input Dataset.
3645
3791
 
3646
3792
  Args:
3647
3793
  input_dataset (Dataset): Input Dataset to be Renamed.
@@ -3670,7 +3816,7 @@ def to_list(items):
3670
3816
 
3671
3817
  class ProjectDataset(UnionBaseDataset):
3672
3818
  """
3673
- The result of applying Project operator to the input Dataset.
3819
+ The result of applying Project operation to the input Dataset.
3674
3820
 
3675
3821
  Args:
3676
3822
  input_dataset (Dataset): Input Dataset to be Projected.
@@ -3691,6 +3837,9 @@ class _ToDevice:
3691
3837
  """
3692
3838
 
3693
3839
  def __init__(self, dataset, num_epochs):
3840
+ if get_debug_mode():
3841
+ logger.error("MindData debugger cannot be used in dataset sink mode. Please manually turn off "
3842
+ "sink mode and try debugger again.")
3694
3843
  ir_tree, self.api_tree = dataset.create_ir_tree()
3695
3844
 
3696
3845
  self._runtime_context = cde.PythonRuntimeContext()
@@ -3705,9 +3854,6 @@ class _ToDevice:
3705
3854
  def send(self):
3706
3855
  self._to_device.Send()
3707
3856
 
3708
- def _reset(self, step):
3709
- self._to_device.Reset(step)
3710
-
3711
3857
  def stop_send(self):
3712
3858
  """
3713
3859
  send stop send signal to pipeline, it is used when end of sequence is sent at the epoch end.
@@ -3746,16 +3892,19 @@ class _ToDevice:
3746
3892
  offload_model = GetOffloadModel(self._to_device, col_names)
3747
3893
  return offload_model
3748
3894
 
3895
+ def _reset(self, step, epoch):
3896
+ self._to_device.Reset(step, epoch)
3897
+
3749
3898
 
3750
3899
  class TransferDataset(Dataset):
3751
3900
  """
3752
- The result of applying TDT operator to the input Dataset.
3901
+ The result of applying TDT operation to the input Dataset.
3753
3902
 
3754
3903
  Args:
3755
3904
  input_dataset (Dataset): Input Dataset to be transferred.
3756
- send_epoch_end (bool, optional): Whether to send end of sequence to device or not (default=True).
3905
+ send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
3757
3906
  create_data_info_queue (bool, optional): Whether to create queue which stores
3758
- types and shapes of data or not (default=False).
3907
+ types and shapes of data or not. Default: False.
3759
3908
 
3760
3909
  Raises:
3761
3910
  TypeError: If device_type is empty.
@@ -3816,11 +3965,6 @@ class TransferDataset(Dataset):
3816
3965
  if self._to_device is not None:
3817
3966
  self._to_device.continue_send()
3818
3967
 
3819
- def _reset(self, step):
3820
- if self._to_device is not None:
3821
- logger.info("Reset the dataset pipeline to step " + str(step))
3822
- self._to_device._reset(step) # pylint: disable=W0212
3823
-
3824
3968
  def get_data_info(self):
3825
3969
  """
3826
3970
  Get type and shape of current batch
@@ -3842,13 +3986,18 @@ class TransferDataset(Dataset):
3842
3986
  if self._to_device is not None:
3843
3987
  self._to_device.release()
3844
3988
 
3989
+ def _reset(self, step, epoch):
3990
+ if self._to_device is not None:
3991
+ logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(epoch))
3992
+ self._to_device._reset(step, epoch) # pylint: disable=protected-access
3993
+
3845
3994
 
3846
3995
  class Schema:
3847
3996
  """
3848
3997
  Class to represent a schema of a dataset.
3849
3998
 
3850
3999
  Args:
3851
- schema_file(str): Path of the schema file (default=None).
4000
+ schema_file(str): Path of the schema file. Default: None.
3852
4001
 
3853
4002
  Returns:
3854
4003
  Schema object, schema info about dataset.
@@ -3877,11 +4026,17 @@ class Schema:
3877
4026
  Args:
3878
4027
  name (str): The new name of the column.
3879
4028
  de_type (str): Data type of the column.
3880
- shape (list[int], optional): Shape of the column
3881
- (default=None, [-1] which is an unknown shape of rank 1).
4029
+ shape (list[int], optional): Shape of the column.
4030
+ Default: None, [-1] which is an unknown shape of rank 1.
3882
4031
 
3883
4032
  Raises:
3884
4033
  ValueError: If column type is unknown.
4034
+
4035
+ Examples:
4036
+ >>> from mindspore import dtype as mstype
4037
+ >>>
4038
+ >>> schema = ds.Schema()
4039
+ >>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
3885
4040
  """
3886
4041
  if isinstance(de_type, typing.Type):
3887
4042
  de_type = mstype_to_detype(de_type)
@@ -3926,6 +4081,12 @@ class Schema:
3926
4081
 
3927
4082
  Returns:
3928
4083
  str, JSON string of the schema.
4084
+
4085
+ Examples:
4086
+ >>> from mindspore.dataset import Schema
4087
+ >>>
4088
+ >>> schema1 = ds.Schema()
4089
+ >>> schema2 = schema1.to_json()
3929
4090
  """
3930
4091
  return self.cpp_schema.to_json()
3931
4092
 
@@ -3940,6 +4101,16 @@ class Schema:
3940
4101
  RuntimeError: if there is unknown item in the object.
3941
4102
  RuntimeError: if dataset type is missing in the object.
3942
4103
  RuntimeError: if columns are missing in the object.
4104
+
4105
+ Examples:
4106
+ >>> import json
4107
+ >>>
4108
+ >>> from mindspore.dataset import Schema
4109
+ >>>
4110
+ >>> with open("/path/to/schema_file") as file:
4111
+ ... json_obj = json.load(file)
4112
+ ... schema = ds.Schema()
4113
+ ... schema.from_json(json_obj)
3943
4114
  """
3944
4115
  self.cpp_schema.from_string(json.dumps(json_obj, indent=2))
3945
4116