mindspore 2.1.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (589) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +139 -22
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  20. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  21. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  22. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  23. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  24. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  25. mindspore/_akg/akg/utils/composite_op_helper.py +16 -12
  26. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  27. mindspore/_akg/akg/utils/kernel_exec.py +98 -274
  28. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  29. mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
  30. mindspore/_akg/akg/utils/util.py +56 -1
  31. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  32. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  33. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  34. mindspore/_check_jit_forbidden_api.py +3 -1
  35. mindspore/_checkparam.py +23 -29
  36. mindspore/_extends/graph_kernel/__init__.py +0 -1
  37. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  38. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  39. mindspore/_extends/graph_kernel/splitter.py +4 -11
  40. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  41. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  42. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  43. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  44. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  45. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  46. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  47. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  48. mindspore/_extends/parse/__init__.py +13 -15
  49. mindspore/_extends/parse/namespace.py +7 -33
  50. mindspore/_extends/parse/parser.py +67 -72
  51. mindspore/_extends/parse/resources.py +1 -1
  52. mindspore/_extends/parse/standard_method.py +86 -106
  53. mindspore/_extends/parse/trope.py +1 -1
  54. mindspore/_extends/remote/kernel_build_server.py +25 -7
  55. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  56. mindspore/_install_custom.py +43 -0
  57. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  58. mindspore/amp.py +47 -11
  59. mindspore/bin/cache_admin +0 -0
  60. mindspore/bin/cache_server +0 -0
  61. mindspore/boost/boost.py +1 -8
  62. mindspore/boost/boost_cell_wrapper.py +3 -2
  63. mindspore/boost/grad_accumulation.py +1 -1
  64. mindspore/boost/group_loss_scale_manager.py +8 -7
  65. mindspore/common/__init__.py +5 -3
  66. mindspore/common/_jit_fallback_utils.py +6 -0
  67. mindspore/common/_register_for_adapter.py +2 -0
  68. mindspore/common/_register_for_tensor.py +2 -2
  69. mindspore/common/_stub_tensor.py +13 -0
  70. mindspore/common/_utils.py +29 -0
  71. mindspore/common/api.py +174 -259
  72. mindspore/common/auto_dynamic_shape.py +494 -0
  73. mindspore/common/dtype.py +18 -11
  74. mindspore/common/dump.py +6 -4
  75. mindspore/common/initializer.py +14 -14
  76. mindspore/common/jit_config.py +33 -15
  77. mindspore/common/lazy_inline.py +126 -7
  78. mindspore/common/mindir_util.py +101 -0
  79. mindspore/common/parameter.py +51 -41
  80. mindspore/common/seed.py +4 -4
  81. mindspore/common/sparse_tensor.py +13 -14
  82. mindspore/common/tensor.py +243 -165
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +83 -4
  85. mindspore/communication/management.py +152 -84
  86. mindspore/config/op_info.config +14 -3
  87. mindspore/config/super_bar_config.json +4 -2
  88. mindspore/context.py +152 -61
  89. mindspore/dataset/__init__.py +5 -5
  90. mindspore/dataset/audio/__init__.py +2 -2
  91. mindspore/dataset/audio/transforms.py +52 -52
  92. mindspore/dataset/callback/ds_callback.py +16 -2
  93. mindspore/dataset/core/config.py +68 -51
  94. mindspore/dataset/engine/cache_client.py +33 -7
  95. mindspore/dataset/engine/datasets.py +250 -112
  96. mindspore/dataset/engine/datasets_audio.py +43 -211
  97. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  98. mindspore/dataset/engine/datasets_text.py +43 -67
  99. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  100. mindspore/dataset/engine/datasets_vision.py +219 -1029
  101. mindspore/dataset/engine/iterators.py +11 -4
  102. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  103. mindspore/dataset/engine/obs/util.py +3 -0
  104. mindspore/dataset/engine/samplers.py +1 -1
  105. mindspore/dataset/engine/validators.py +19 -5
  106. mindspore/dataset/text/__init__.py +3 -3
  107. mindspore/dataset/text/transforms.py +101 -127
  108. mindspore/dataset/text/utils.py +205 -138
  109. mindspore/dataset/transforms/__init__.py +1 -1
  110. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  111. mindspore/dataset/transforms/transforms.py +95 -40
  112. mindspore/dataset/utils/browse_dataset.py +8 -2
  113. mindspore/dataset/utils/line_reader.py +17 -19
  114. mindspore/dataset/vision/__init__.py +3 -3
  115. mindspore/dataset/vision/c_transforms.py +6 -3
  116. mindspore/dataset/vision/transforms.py +409 -287
  117. mindspore/dataset/vision/utils.py +13 -14
  118. mindspore/dataset/vision/validators.py +11 -1
  119. mindspore/experimental/map_parameter.py +14 -0
  120. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  121. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  122. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  123. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  124. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  125. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  126. mindspore/gen_ops.py +273 -0
  127. mindspore/include/OWNERS +0 -1
  128. mindspore/include/api/data_type.h +2 -1
  129. mindspore/include/api/graph.h +0 -15
  130. mindspore/include/api/kernel.h +2 -0
  131. mindspore/include/api/kernel_api.h +37 -12
  132. mindspore/include/api/model.h +17 -14
  133. mindspore/include/api/status.h +8 -3
  134. mindspore/include/api/types.h +37 -4
  135. mindspore/include/c_api/ms/abstract.h +67 -0
  136. mindspore/include/c_api/ms/attribute.h +197 -0
  137. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  138. mindspore/include/c_api/ms/base/macros.h +32 -0
  139. mindspore/include/c_api/ms/base/status.h +33 -0
  140. mindspore/include/c_api/ms/base/types.h +282 -0
  141. mindspore/include/c_api/ms/context.h +102 -0
  142. mindspore/include/c_api/ms/graph.h +160 -0
  143. mindspore/include/c_api/ms/node.h +606 -0
  144. mindspore/include/c_api/ms/tensor.h +161 -0
  145. mindspore/include/c_api/ms/value.h +84 -0
  146. mindspore/include/dataset/constants.h +6 -5
  147. mindspore/include/dataset/execute.h +23 -13
  148. mindspore/include/dataset/text.h +26 -26
  149. mindspore/include/dataset/transforms.h +13 -13
  150. mindspore/include/dataset/vision.h +60 -60
  151. mindspore/include/dataset/vision_ascend.h +5 -6
  152. mindspore/include/dataset/vision_lite.h +17 -17
  153. mindspore/include/mindapi/base/type_id.h +1 -0
  154. mindspore/include/mindapi/base/types.h +1 -0
  155. mindspore/lib/libdnnl.so.2 +0 -0
  156. mindspore/lib/libjemalloc.so.2 +0 -0
  157. mindspore/lib/libmindspore.so +0 -0
  158. mindspore/lib/libmindspore_backend.so +0 -0
  159. mindspore/lib/libmindspore_common.so +0 -0
  160. mindspore/lib/libmindspore_core.so +0 -0
  161. mindspore/lib/libmindspore_glog.so.0 +0 -0
  162. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  163. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  164. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  165. mindspore/lib/libmindspore_shared_lib.so +0 -0
  166. mindspore/lib/libnnacl.so +0 -0
  167. mindspore/lib/libopencv_core.so.4.5 +0 -0
  168. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  169. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  170. mindspore/lib/libps_cache.so +0 -0
  171. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  172. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  173. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  174. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  175. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  176. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  177. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  178. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  179. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  180. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  181. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  182. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  183. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  186. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8998 -0
  187. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  188. mindspore/lib/plugin/ascend/libakg.so +0 -0
  189. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  190. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  191. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  192. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  193. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  194. mindspore/lib/plugin/cpu/libakg.so +0 -0
  195. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  196. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  197. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  198. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  199. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  201. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  202. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  203. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  204. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  205. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  206. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  207. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  208. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  209. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  210. mindspore/nn/__init__.py +0 -2
  211. mindspore/nn/cell.py +313 -74
  212. mindspore/nn/dynamic_lr.py +21 -21
  213. mindspore/nn/layer/activation.py +22 -30
  214. mindspore/nn/layer/basic.py +15 -13
  215. mindspore/nn/layer/channel_shuffle.py +1 -1
  216. mindspore/nn/layer/container.py +271 -9
  217. mindspore/nn/layer/conv.py +323 -204
  218. mindspore/nn/layer/dense.py +8 -5
  219. mindspore/nn/layer/embedding.py +33 -27
  220. mindspore/nn/layer/flash_attention.py +61 -95
  221. mindspore/nn/layer/image.py +8 -6
  222. mindspore/nn/layer/math.py +16 -25
  223. mindspore/nn/layer/normalization.py +107 -66
  224. mindspore/nn/layer/padding.py +1 -1
  225. mindspore/nn/layer/pooling.py +131 -109
  226. mindspore/nn/layer/rnn_cells.py +27 -22
  227. mindspore/nn/layer/rnns.py +13 -16
  228. mindspore/nn/layer/thor_layer.py +1 -1
  229. mindspore/nn/layer/transformer.py +221 -154
  230. mindspore/nn/learning_rate_schedule.py +9 -1
  231. mindspore/nn/loss/loss.py +235 -174
  232. mindspore/nn/optim/ada_grad.py +2 -1
  233. mindspore/nn/optim/adadelta.py +1 -0
  234. mindspore/nn/optim/adafactor.py +2 -1
  235. mindspore/nn/optim/adam.py +7 -4
  236. mindspore/nn/optim/adamax.py +3 -2
  237. mindspore/nn/optim/adasum.py +2 -2
  238. mindspore/nn/optim/asgd.py +2 -3
  239. mindspore/nn/optim/ftrl.py +6 -5
  240. mindspore/nn/optim/lamb.py +7 -4
  241. mindspore/nn/optim/lars.py +1 -1
  242. mindspore/nn/optim/lazyadam.py +5 -3
  243. mindspore/nn/optim/momentum.py +2 -1
  244. mindspore/nn/optim/optimizer.py +53 -4
  245. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  246. mindspore/nn/optim/rmsprop.py +4 -3
  247. mindspore/nn/optim/rprop.py +23 -12
  248. mindspore/nn/optim/sgd.py +26 -11
  249. mindspore/nn/optim/thor.py +9 -7
  250. mindspore/nn/probability/bijector/bijector.py +5 -5
  251. mindspore/nn/probability/bijector/power_transform.py +27 -27
  252. mindspore/nn/probability/bijector/softplus.py +3 -3
  253. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  254. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  255. mindspore/nn/probability/distribution/beta.py +3 -3
  256. mindspore/nn/probability/distribution/categorical.py +7 -7
  257. mindspore/nn/probability/distribution/cauchy.py +0 -1
  258. mindspore/nn/probability/distribution/distribution.py +3 -3
  259. mindspore/nn/probability/distribution/gamma.py +3 -3
  260. mindspore/nn/probability/distribution/geometric.py +4 -4
  261. mindspore/nn/probability/distribution/gumbel.py +4 -4
  262. mindspore/nn/probability/distribution/log_normal.py +2 -2
  263. mindspore/nn/probability/distribution/logistic.py +2 -2
  264. mindspore/nn/probability/distribution/poisson.py +4 -4
  265. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  266. mindspore/nn/probability/distribution/uniform.py +6 -6
  267. mindspore/nn/wrap/__init__.py +4 -2
  268. mindspore/nn/wrap/cell_wrapper.py +87 -34
  269. mindspore/nn/wrap/grad_reducer.py +8 -5
  270. mindspore/nn/wrap/loss_scale.py +105 -42
  271. mindspore/numpy/array_creations.py +1 -2
  272. mindspore/numpy/array_ops.py +3 -2
  273. mindspore/numpy/utils_const.py +5 -5
  274. mindspore/offline_debug/convert_async.py +2 -2
  275. mindspore/ops/_grad_experimental/__init__.py +0 -5
  276. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  277. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  278. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  279. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  280. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  281. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  282. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  283. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  284. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  285. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  286. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  287. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  288. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  289. mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
  290. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  291. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  292. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  293. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  294. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  295. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  296. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  297. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  298. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  299. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  300. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  301. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  302. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  303. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  304. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  305. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  306. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  307. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  308. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  309. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  310. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  311. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  312. mindspore/ops/_primitive_cache.py +1 -1
  313. mindspore/ops/_tracefunc.py +45 -13
  314. mindspore/ops/_utils/utils.py +6 -1
  315. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  316. mindspore/ops/_vmap/vmap_base.py +3 -3
  317. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  318. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  319. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  320. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  321. mindspore/ops/arg_dtype_cast.py +54 -0
  322. mindspore/ops/composite/base.py +37 -10
  323. mindspore/ops/composite/math_ops.py +5 -4
  324. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  325. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  326. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  327. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  328. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  329. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  330. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  331. mindspore/ops/deprecated.py +304 -0
  332. mindspore/ops/function/__init__.py +4 -1
  333. mindspore/ops/function/array_func.py +174 -193
  334. mindspore/ops/function/clip_func.py +81 -13
  335. mindspore/ops/function/debug_func.py +1 -1
  336. mindspore/ops/function/grad/grad_func.py +18 -9
  337. mindspore/ops/function/image_func.py +10 -4
  338. mindspore/ops/function/linalg_func.py +5 -5
  339. mindspore/ops/function/math_func.py +575 -386
  340. mindspore/ops/function/nn_func.py +568 -260
  341. mindspore/ops/function/random_func.py +88 -57
  342. mindspore/ops/function/sparse_func.py +1 -1
  343. mindspore/ops/function/sparse_unary_func.py +14 -12
  344. mindspore/ops/function/vmap_func.py +6 -5
  345. mindspore/ops/functional.py +15 -10
  346. mindspore/ops/op_info_register.py +244 -25
  347. mindspore/ops/operations/__init__.py +31 -19
  348. mindspore/ops/operations/_grad_ops.py +71 -7
  349. mindspore/ops/operations/_inner_ops.py +350 -17
  350. mindspore/ops/operations/_quant_ops.py +4 -8
  351. mindspore/ops/operations/_sequence_ops.py +42 -0
  352. mindspore/ops/operations/array_ops.py +68 -282
  353. mindspore/ops/operations/comm_ops.py +107 -59
  354. mindspore/ops/operations/custom_ops.py +94 -70
  355. mindspore/ops/operations/debug_ops.py +8 -4
  356. mindspore/ops/operations/image_ops.py +18 -12
  357. mindspore/ops/operations/inner_ops.py +26 -3
  358. mindspore/ops/operations/math_ops.py +192 -144
  359. mindspore/ops/operations/nn_ops.py +857 -489
  360. mindspore/ops/operations/other_ops.py +0 -22
  361. mindspore/ops/operations/random_ops.py +53 -111
  362. mindspore/ops/operations/sparse_ops.py +3 -1
  363. mindspore/ops/primitive.py +24 -18
  364. mindspore/parallel/_auto_parallel_context.py +68 -8
  365. mindspore/parallel/_cost_model_context.py +2 -2
  366. mindspore/parallel/_offload_context.py +17 -3
  367. mindspore/parallel/_parallel_serialization.py +12 -5
  368. mindspore/parallel/_ps_context.py +12 -0
  369. mindspore/parallel/_tensor.py +18 -13
  370. mindspore/parallel/_transformer/layers.py +5 -3
  371. mindspore/parallel/_transformer/loss.py +1 -0
  372. mindspore/parallel/_transformer/moe.py +2 -2
  373. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  374. mindspore/parallel/_transformer/transformer.py +23 -3
  375. mindspore/parallel/_utils.py +11 -7
  376. mindspore/parallel/algo_parameter_config.py +85 -5
  377. mindspore/parallel/checkpoint_transform.py +19 -12
  378. mindspore/parallel/shard.py +21 -14
  379. mindspore/profiler/common/struct_type.py +3 -3
  380. mindspore/profiler/common/util.py +4 -2
  381. mindspore/profiler/envprofiling.py +1 -1
  382. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  383. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  384. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  385. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  386. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  387. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  388. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  389. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  390. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  391. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  392. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  393. mindspore/profiler/parser/flops_parser.py +15 -11
  394. mindspore/profiler/parser/framework_parser.py +38 -22
  395. mindspore/profiler/parser/hccl_parser.py +16 -12
  396. mindspore/profiler/parser/integrator.py +22 -11
  397. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  398. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  399. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  400. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  401. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  402. mindspore/profiler/parser/optime_parser.py +1 -1
  403. mindspore/profiler/parser/profiler_info.py +21 -2
  404. mindspore/profiler/parser/step_trace_parser.py +11 -14
  405. mindspore/profiler/profiling.py +179 -89
  406. mindspore/rewrite/api/node.py +102 -19
  407. mindspore/rewrite/api/node_type.py +5 -1
  408. mindspore/rewrite/api/pattern_engine.py +1 -1
  409. mindspore/rewrite/api/scoped_value.py +9 -17
  410. mindspore/rewrite/api/symbol_tree.py +131 -47
  411. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  412. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  413. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  414. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  415. mindspore/rewrite/common/rewrite_elog.py +5 -1
  416. mindspore/rewrite/namer.py +33 -24
  417. mindspore/rewrite/namespace.py +14 -5
  418. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  419. mindspore/rewrite/node/call_function.py +79 -0
  420. mindspore/rewrite/node/cell_container.py +135 -0
  421. mindspore/rewrite/node/control_flow.py +88 -0
  422. mindspore/rewrite/{node.py → node/node.py} +273 -234
  423. mindspore/rewrite/node/node_manager.py +254 -0
  424. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  425. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  426. mindspore/rewrite/parsers/assign_parser.py +216 -221
  427. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  428. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  429. mindspore/rewrite/parsers/constant_parser.py +9 -6
  430. mindspore/rewrite/parsers/container_parser.py +9 -7
  431. mindspore/rewrite/parsers/for_parser.py +42 -21
  432. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  433. mindspore/rewrite/parsers/if_parser.py +28 -24
  434. mindspore/rewrite/parsers/module_parser.py +196 -25
  435. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  436. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  437. mindspore/rewrite/parsers/return_parser.py +6 -6
  438. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  439. mindspore/rewrite/sparsify/utils.py +1 -1
  440. mindspore/rewrite/symbol_tree.py +523 -578
  441. mindspore/rewrite/symbol_tree_builder.py +9 -193
  442. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  443. mindspore/run_check/_check_version.py +6 -4
  444. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  445. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  446. mindspore/scipy/linalg.py +1 -1
  447. mindspore/scipy/ops.py +55 -5
  448. mindspore/scipy/optimize/__init__.py +3 -2
  449. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  450. mindspore/scipy/optimize/minimize.py +7 -3
  451. mindspore/train/_utils.py +7 -3
  452. mindspore/train/amp.py +323 -123
  453. mindspore/train/anf_ir_pb2.py +14 -2
  454. mindspore/train/callback/_backup_and_restore.py +2 -12
  455. mindspore/train/callback/_callback.py +29 -4
  456. mindspore/train/callback/_checkpoint.py +23 -8
  457. mindspore/train/callback/_early_stop.py +2 -2
  458. mindspore/train/callback/_landscape.py +4 -4
  459. mindspore/train/callback/_loss_monitor.py +2 -2
  460. mindspore/train/callback/_on_request_exit.py +2 -2
  461. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  462. mindspore/train/callback/_summary_collector.py +15 -8
  463. mindspore/train/callback/_time_monitor.py +58 -5
  464. mindspore/train/data_sink.py +5 -11
  465. mindspore/train/dataset_helper.py +84 -57
  466. mindspore/train/loss_scale_manager.py +2 -2
  467. mindspore/train/metrics/__init__.py +3 -3
  468. mindspore/train/metrics/cosine_similarity.py +1 -1
  469. mindspore/train/metrics/hausdorff_distance.py +3 -2
  470. mindspore/train/metrics/mean_surface_distance.py +3 -2
  471. mindspore/train/metrics/metric.py +39 -19
  472. mindspore/train/metrics/roc.py +2 -2
  473. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  474. mindspore/train/mind_ir_pb2.py +85 -36
  475. mindspore/train/model.py +187 -47
  476. mindspore/train/serialization.py +487 -161
  477. mindspore/train/summary/_summary_adapter.py +1 -1
  478. mindspore/train/summary/_writer_pool.py +3 -2
  479. mindspore/train/summary/summary_record.py +37 -17
  480. mindspore/train/train_thor/convert_utils.py +3 -3
  481. mindspore/train/train_thor/dataset_helper.py +1 -1
  482. mindspore/version.py +1 -1
  483. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +8 -8
  484. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +488 -539
  485. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -1
  486. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  487. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  488. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  489. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  490. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  491. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  492. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  493. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  494. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  495. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  496. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  497. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  498. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  499. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  500. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  501. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  502. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  503. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  504. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  505. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  506. mindspore/_extends/graph_kernel/expander.py +0 -80
  507. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  508. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  509. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  510. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  511. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  512. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  513. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  514. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  515. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  516. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  517. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  518. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  519. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  520. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  521. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  522. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  523. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  524. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  525. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  526. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  527. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  528. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  529. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  530. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  531. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  532. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  533. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  534. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  535. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  536. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  537. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  538. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  539. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  540. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  541. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  542. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  543. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  544. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  545. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  546. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  547. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  548. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  549. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  550. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  551. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  552. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  553. mindspore/dataset/datapreprocess/__init__.py +0 -20
  554. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  555. mindspore/include/api/net.h +0 -142
  556. mindspore/nn/lr_scheduler.py +0 -262
  557. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  558. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  559. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  560. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  561. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  562. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
  563. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
  564. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
  565. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
  566. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
  567. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  568. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  569. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  570. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  571. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  572. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  573. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  574. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  575. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  576. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  577. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  578. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  579. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  580. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  582. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  583. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  584. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  585. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  586. mindspore/rewrite/node_visitor.py +0 -44
  587. /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
  588. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  589. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
@@ -129,8 +129,7 @@ def _reset_training_dataset(global_step, dataset_size):
129
129
  """
130
130
  dataset = _get_training_dataset()
131
131
  if dataset is not None:
132
- epoch = global_step // dataset_size
133
- dataset._reset(global_step, epoch) # pylint: disable=protected-access
132
+ dataset._reset(global_step, dataset_size) # pylint: disable=protected-access
134
133
  else:
135
134
  raise RuntimeError("Training dataset is not set.")
136
135
 
@@ -210,7 +209,7 @@ def zip(datasets):
210
209
  The number of datasets must be more than 1.
211
210
 
212
211
  Returns:
213
- Dataset, dataset zipped.
212
+ Dataset, a new dataset with the above operation applied.
214
213
 
215
214
  Raises:
216
215
  ValueError: If the number of datasets is 1.
@@ -352,6 +351,7 @@ class Dataset:
352
351
  self._repeat_count = None
353
352
  self._class_indexing = None
354
353
  self._sync = False
354
+ self._global_step = None
355
355
 
356
356
  @staticmethod
357
357
  def _get_operator_id(dataset):
@@ -388,36 +388,42 @@ class Dataset:
388
388
  _OP_PROCESS.update(generator_process)
389
389
  return op_name
390
390
 
391
- def create_ir_tree(self):
391
+ def create_ir_tree(self, getter_mode=False):
392
392
  """
393
393
  Internal method to build an IR tree.
394
394
 
395
+ Args:
396
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
397
+
395
398
  Returns:
396
- DatasetNode, the root node of the IR tree.
397
- Dataset, the root dataset of the IR tree.
399
+ Union[DatasetNode, Dataset], the root node of the IR tree and the root dataset of the IR tree.
398
400
  """
399
401
  parent = self.parent
400
402
  self.parent = []
401
403
  dataset = copy.deepcopy(self)
402
404
  global _OP_NAME
403
405
  _OP_NAME = Dataset._get_operator_id(dataset)
404
- ir_tree = dataset.parse_tree()
406
+ ir_tree = dataset.parse_tree(getter_mode)
405
407
  self.parent = parent
406
408
  _init_device_info()
407
409
  return ir_tree, dataset
408
410
 
409
- def parse_tree(self):
411
+ def parse_tree(self, getter_mode=False):
410
412
  """
411
413
  Internal method to parse the API tree into an IR tree.
412
414
 
415
+ Args:
416
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
417
+
413
418
  Returns:
414
419
  DatasetNode, the root node of the IR tree.
415
420
  """
416
421
  if len(self.parent) > 1:
417
422
  raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)")
418
- ir_children = [d.parse_tree() for d in self.children]
423
+ ir_children = [d.parse_tree(getter_mode) for d in self.children]
419
424
  # Bootstrap can only be performed on a copy of the original dataset node.
420
425
  # Bootstrap on original dataset node will make all iterators share the same process pool
426
+ self.pre_parse(getter_mode)
421
427
  self.iterator_bootstrap()
422
428
  ir_node = self.parse(ir_children)
423
429
  ir_node = self.post_parse(ir_node)
@@ -516,7 +522,7 @@ class Dataset:
516
522
  bucket if it is not a full batch. Default: ``False``.
517
523
 
518
524
  Returns:
519
- Dataset, dataset bucketized and batched by length.
525
+ Dataset, a new dataset with the above operation applied.
520
526
 
521
527
  Examples:
522
528
  >>> # Create a dataset where certain counts rows are combined into a batch
@@ -595,12 +601,17 @@ class Dataset:
595
601
  multi-threading mode. If `per_batch_map` is a CPU bound task, it is recommended to use
596
602
  multi-processing mode. Default: ``False`` , use python multi-threading mode.
597
603
 
598
- - max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to
599
- copy data between processes. This is only used if `python_multiprocessing` is set to ``True``.
600
- Default: ``16`` .
604
+ - max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
605
+ allocation to copy data between processes, the total occupied shared memory will increase as
606
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
607
+ used if python_multiprocessing is set to True. If it is an int value, it represents
608
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
609
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
610
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
611
+ to create shared memory. Default: 16.
601
612
 
602
613
  Returns:
603
- BatchDataset, dataset batched.
614
+ Dataset, a new dataset with the above operation applied.
604
615
 
605
616
  Examples:
606
617
  >>> # 1) Create a dataset where every 5 rows are combined into a batch
@@ -666,7 +677,7 @@ class Dataset:
666
677
  to ``None``. Default: ``None``.
667
678
 
668
679
  Returns:
669
- PaddedBatchDataset, dataset batched.
680
+ Dataset, a new dataset with the above operation applied.
670
681
 
671
682
  Examples:
672
683
  >>> # 1) Pad every sample to the largest sample's shape and batch the samples
@@ -700,7 +711,7 @@ class Dataset:
700
711
  Default: ``None``.
701
712
 
702
713
  Returns:
703
- SyncWaitDataset, dataset added a blocking condition.
714
+ Dataset, a new dataset with the above operation applied.
704
715
 
705
716
  Raises:
706
717
  RuntimeError: If condition name already exists.
@@ -758,7 +769,7 @@ class Dataset:
758
769
  dataset will result in a global shuffle.
759
770
 
760
771
  Returns:
761
- Dataset, dataset shuffled.
772
+ Dataset, a new dataset with the above operation applied.
762
773
 
763
774
  Raises:
764
775
  RuntimeError: If exist sync operations before shuffle.
@@ -784,7 +795,7 @@ class Dataset:
784
795
  return a `Dataset` .
785
796
 
786
797
  Returns:
787
- Dataset, dataset applied by the function.
798
+ Dataset, a new dataset with the above operation applied.
788
799
 
789
800
  Examples:
790
801
  >>> import mindspore.dataset as ds
@@ -847,11 +858,11 @@ class Dataset:
847
858
  `output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
848
859
 
849
860
  - If you use transformations (
850
- `vision transform <https://mindspore.cn/docs/en/r2.1/api_python/mindspore.\
861
+ `vision transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
851
862
  dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
852
- `nlp transform <https://mindspore.cn/docs/en/r2.1/api_python/mindspore.\
863
+ `nlp transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
853
864
  dataset.transforms.html#module-mindspore.dataset.text>`_ ,
854
- `audio transform <https://mindspore.cn/docs/en/r2.1/api_python/mindspore.\
865
+ `audio transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
855
866
  dataset.transforms.html#module-mindspore.dataset.audio>`_ )
856
867
  provided by mindspore dataset, please use the following parameters:
857
868
 
@@ -881,9 +892,14 @@ class Dataset:
881
892
  - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
882
893
  This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
883
894
 
884
- - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to
885
- copy data between processes. This is only used if `python_multiprocessing` is set to ``True``.
886
- Default: ``16``.
895
+ - max_rowsize (Union[int, list[int]], optional): Maximum size of row in MB that is used for shared
896
+ memory allocation to copy data between processes, the total occupied shared memory will increase as
897
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
898
+ used if python_multiprocessing is set to True. If it is an int value, it represents
899
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
900
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
901
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
902
+ to create shared memory. Default: 16.
887
903
 
888
904
  - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
889
905
  Default: ``None``, which means no cache is used.
@@ -900,7 +916,7 @@ class Dataset:
900
916
  `operations` .
901
917
 
902
918
  Returns:
903
- Dataset, dataset after mapping operation.
919
+ Dataset, a new dataset with the above operation applied.
904
920
 
905
921
  Examples:
906
922
  >>> import mindspore.dataset as ds
@@ -986,7 +1002,7 @@ class Dataset:
986
1002
  in parallel. Default: ``None``.
987
1003
 
988
1004
  Returns:
989
- Dataset, dataset filtered.
1005
+ Dataset, a new dataset with the above operation applied.
990
1006
 
991
1007
  Examples:
992
1008
  >>> # generator data(0 ~ 19)
@@ -1010,7 +1026,7 @@ class Dataset:
1010
1026
  count (int): Number of times the dataset is going to be repeated. Default: ``None``.
1011
1027
 
1012
1028
  Returns:
1013
- Dataset, dataset repeated.
1029
+ Dataset, a new dataset with the above operation applied.
1014
1030
 
1015
1031
  Examples:
1016
1032
  >>> import mindspore.dataset as ds
@@ -1040,7 +1056,7 @@ class Dataset:
1040
1056
  count (int): Number of elements in the dataset to be skipped.
1041
1057
 
1042
1058
  Returns:
1043
- Dataset, dataset that containing rows like origin rows subtract skipped rows.
1059
+ Dataset, a new dataset with the above operation applied.
1044
1060
 
1045
1061
  Examples:
1046
1062
  >>> import mindspore.dataset as ds
@@ -1053,19 +1069,22 @@ class Dataset:
1053
1069
  @check_take
1054
1070
  def take(self, count=-1):
1055
1071
  """
1056
- Takes at most given numbers of elements from the dataset.
1057
-
1058
- Note:
1059
- 1. If count is greater than the number of elements in the dataset or equal to -1,
1060
- all the elements in dataset will be taken.
1061
- 2. The order of using take and batch matters. If take is before batch operation,
1062
- then take the given number of rows; otherwise take the given number of batches.
1072
+ Take the first specified number of samples from the dataset.
1063
1073
 
1064
1074
  Args:
1065
- count (int, optional): Number of elements to be taken from the dataset. Default: ``-1`` .
1075
+ count (int, optional): The desired number of samples to take. If the value exceeds
1076
+ the total number of samples in the dataset, all data will be returned.
1077
+ Default: ``-1`` , will return all data.
1078
+
1079
+ Note:
1080
+ When there are operations that will change the number of samples of the dataset in
1081
+ the data pipeline, the location of the `take` operation can change its effect.
1082
+ For example, `batch` operation will combine the successive samples of the specified
1083
+ `batch_size` into 1 sample, so `.batch(batch_size).take(1)` will be equivalent to
1084
+ `.take(batch_size).batch(batch_size)`.
1066
1085
 
1067
1086
  Returns:
1068
- Dataset, dataset taken.
1087
+ Dataset, a new dataset with the above operation applied.
1069
1088
 
1070
1089
  Examples:
1071
1090
  >>> import mindspore.dataset as ds
@@ -1163,7 +1182,7 @@ class Dataset:
1163
1182
  will be different in each epoch.
1164
1183
 
1165
1184
  Returns:
1166
- tuple(Dataset), a tuple of datasets that have been split.
1185
+ Tuple[Dataset], a tuple of new datasets split from the original one.
1167
1186
 
1168
1187
  Raises:
1169
1188
  RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -1218,7 +1237,7 @@ class Dataset:
1218
1237
  to be zipped together with this dataset.
1219
1238
 
1220
1239
  Returns:
1221
- Dataset, dataset zipped.
1240
+ Dataset, a new dataset with the above operation applied.
1222
1241
 
1223
1242
  Raises:
1224
1243
  TypeError: The parameter is not dataset object or tuple of dataset objects.
@@ -1244,6 +1263,12 @@ class Dataset:
1244
1263
  Concatenate the dataset objects in the input list.
1245
1264
  Performing "+" operation on dataset objects can achieve the same effect.
1246
1265
 
1266
+ For a dataset concatenated by many other dataset objects, it returns the data in the order of
1267
+ datasets passed in. If you want to change the data order(such as random selection from each dataset
1268
+ instead of in sequence), apply `use_sampler` method on the concatenated dataset object.
1269
+ Currently `use_sampler` supports `dataset.DistributedSampler` for sharding selection from each dataset
1270
+ or `dataset.RandomSampler` for random selection from each dataset, see examples below.
1271
+
1247
1272
  Note:
1248
1273
  The column name, and rank and type of the column data must be the same in the input datasets.
1249
1274
 
@@ -1252,17 +1277,45 @@ class Dataset:
1252
1277
  to be concatenated together with this dataset.
1253
1278
 
1254
1279
  Returns:
1255
- Dataset, dataset concatenated.
1280
+ Dataset, a new dataset with the above operation applied.
1256
1281
 
1257
1282
  Examples:
1258
1283
  >>> import mindspore.dataset as ds
1259
- >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1")
1260
- >>> dataset_2 = ds.GeneratorDataset([2, 3, 4], "column1")
1284
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1285
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1261
1286
  >>>
1262
1287
  >>> # Create a dataset by concatenating dataset_1 and dataset_2 with "+" operator
1263
1288
  >>> dataset = dataset_1 + dataset_2
1264
1289
  >>> # Create a dataset by concatenating dataset_1 and dataset_2 with concat operation
1265
1290
  >>> dataset = dataset_1.concat(dataset_2)
1291
+ >>>
1292
+ >>> # Check the data order of dataset
1293
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1294
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1295
+ >>> dataset = dataset_1 + dataset_2
1296
+ >>> result = list(dataset)
1297
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 2)],
1298
+ >>> # [Tensor(shape=[], dtype=Int64, value= 3)], [Tensor(shape=[], dtype=Int64, value= 4)],
1299
+ >>> # [Tensor(shape=[], dtype=Int64, value= 5)], [Tensor(shape=[], dtype=Int64, value= 6)]]
1300
+ >>>
1301
+ >>> # Change the data order of concatenated dataset with sharding selection
1302
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1303
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1304
+ >>> dataset = dataset_1.concat(dataset_2)
1305
+ >>> dataset.use_sampler(ds.DistributedSampler(num_shards=2, shard_id=1, shuffle=False))
1306
+ >>> result = list(dataset)
1307
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 4)],
1308
+ >>> # [Tensor(shape=[], dtype=Int64, value= 6)]]
1309
+ >>>
1310
+ >>> # Change the data order of concatenated dataset with random selection
1311
+ >>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
1312
+ >>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
1313
+ >>> dataset = dataset_1.concat(dataset_2)
1314
+ >>> dataset.use_sampler(ds.RandomSampler())
1315
+ >>> result = list(dataset)
1316
+ >>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 4)],
1317
+ >>> # [Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 5)],
1318
+ >>> # [Tensor(shape=[], dtype=Int64, value= 6)], [Tensor(shape=[], dtype=Int64, value= 3)]]
1266
1319
  """
1267
1320
  if isinstance(datasets, Dataset):
1268
1321
  datasets = [self] + [datasets]
@@ -1282,7 +1335,7 @@ class Dataset:
1282
1335
  output_columns (Union[str, list[str]]): List of names of the output columns.
1283
1336
 
1284
1337
  Returns:
1285
- Dataset, dataset renamed.
1338
+ Dataset, a new dataset with the above operation applied.
1286
1339
 
1287
1340
  Examples:
1288
1341
  >>> import mindspore.dataset as ds
@@ -1308,7 +1361,7 @@ class Dataset:
1308
1361
  columns(Union[str, list[str]]): List of names of the columns to project.
1309
1362
 
1310
1363
  Returns:
1311
- Dataset, dataset projected.
1364
+ Dataset, a new dataset with the above operation applied.
1312
1365
 
1313
1366
  Examples:
1314
1367
  >>> import mindspore.dataset as ds
@@ -1332,7 +1385,7 @@ class Dataset:
1332
1385
  return a preprocessed `Dataset` .
1333
1386
 
1334
1387
  Returns:
1335
- Dataset, dataset applied by the function.
1388
+ Dataset, a new dataset with the above operation applied.
1336
1389
 
1337
1390
  Examples:
1338
1391
  >>> import mindspore.dataset as ds
@@ -1360,7 +1413,7 @@ class Dataset:
1360
1413
  return dataset
1361
1414
 
1362
1415
  @check_device_send
1363
- def device_que(self, send_epoch_end=True, create_data_info_queue=False):
1416
+ def device_que(self, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
1364
1417
  """
1365
1418
  Return a transferred Dataset that transfers data through a device.
1366
1419
 
@@ -1369,13 +1422,15 @@ class Dataset:
1369
1422
  Default: ``True``.
1370
1423
  create_data_info_queue (bool, optional): Whether to create queue which stores
1371
1424
  types and shapes of data or not. Default: ``False``.
1425
+ queue_name (str, optional): Name of queue which connects dataset processing and model
1426
+ computing. Default: ``""``.
1372
1427
 
1373
1428
  Note:
1374
1429
  If device is Ascend, features of data will be transferred one by one. The limitation
1375
1430
  of data transmission per time is 256M.
1376
1431
 
1377
1432
  Returns:
1378
- Dataset, dataset for transferring.
1433
+ Dataset, a new dataset with the above operation applied.
1379
1434
 
1380
1435
  Examples:
1381
1436
  >>> import mindspore.dataset as ds
@@ -1387,7 +1442,7 @@ class Dataset:
1387
1442
  >>> time.sleep(0.1)
1388
1443
  >>> data.stop_send()
1389
1444
  """
1390
- return TransferDataset(self, send_epoch_end, create_data_info_queue)
1445
+ return TransferDataset(self, send_epoch_end, create_data_info_queue, queue_name)
1391
1446
 
1392
1447
  @check_save
1393
1448
  def save(self, file_name, num_files=1, file_type='mindrecord'):
@@ -1504,7 +1559,7 @@ class Dataset:
1504
1559
  Default: ``True``.
1505
1560
 
1506
1561
  Returns:
1507
- Iterator, tuple iterator over the dataset.
1562
+ Iterator, a dataset iterator that returns data of type Tuple.
1508
1563
 
1509
1564
  Examples:
1510
1565
  >>> import mindspore.dataset as ds
@@ -1538,7 +1593,7 @@ class Dataset:
1538
1593
  Default: ``True`` .
1539
1594
 
1540
1595
  Returns:
1541
- Iterator, dictionary iterator over the dataset.
1596
+ Iterator, a dataset iterator that returns data of type Dict.
1542
1597
 
1543
1598
  Examples:
1544
1599
  >>> import mindspore.dataset as ds
@@ -1599,11 +1654,14 @@ class Dataset:
1599
1654
  def copy_batch_size(self, value):
1600
1655
  self._batch_size = value
1601
1656
 
1602
- def _init_tree_getters(self):
1657
+ def _init_tree_getters(self, getter_mode=True):
1603
1658
  """
1604
1659
  Get pipeline information.
1660
+
1661
+ Args:
1662
+ getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``True``.
1605
1663
  """
1606
- ir_tree, api_tree = self.create_ir_tree()
1664
+ ir_tree, api_tree = self.create_ir_tree(getter_mode)
1607
1665
 
1608
1666
  runtime_context = cde.PythonRuntimeContext()
1609
1667
  runtime_context.Init()
@@ -1904,12 +1962,12 @@ class Dataset:
1904
1962
 
1905
1963
  def get_class_indexing(self):
1906
1964
  """
1907
- Return the class index.
1965
+ Get the mapping dictionary from category names to category indexes.
1966
+
1967
+ This dictionary can be used to look up which category name corresponds to a particular category index.
1908
1968
 
1909
1969
  Returns:
1910
- dict, a str-to-int mapping from label name to index.
1911
- dict, a str-to-list<int> mapping from label name to index for Coco ONLY. The second number
1912
- in the list is used to indicate the super category.
1970
+ Dict[str, int], the mappings from category names to category indexes.
1913
1971
 
1914
1972
  Examples:
1915
1973
  >>> import mindspore.dataset as ds
@@ -1981,6 +2039,13 @@ class Dataset:
1981
2039
  shard_id = 0
1982
2040
  return num_shards, shard_id
1983
2041
 
2042
+ def pre_parse(self, getter_mode):
2043
+ if getter_mode:
2044
+ if hasattr(self, "python_multiprocessing"):
2045
+ self.python_multiprocessing = False
2046
+ if hasattr(self, "num_parallel_workers"):
2047
+ self.num_parallel_workers = 1
2048
+
1984
2049
  def post_parse(self, ir_node):
1985
2050
  if self.cache:
1986
2051
  ir_node = ir_node.set_cache_client(self.cache.cache_client)
@@ -1989,6 +2054,18 @@ class Dataset:
1989
2054
 
1990
2055
  return ir_node
1991
2056
 
2057
+ def set_init_step(self, init_step):
2058
+ self._global_step = init_step
2059
+
2060
+ def get_init_step(self):
2061
+ if self._global_step is not None:
2062
+ return self._global_step
2063
+ if len(self.children) == 1:
2064
+ return self.children[0].get_init_step()
2065
+ # When there are multiple children, we cannot tell from which child to get the initial step,
2066
+ # so we initialize from the beginning
2067
+ return 0
2068
+
1992
2069
 
1993
2070
  class VisionBaseDataset(Dataset):
1994
2071
  """
@@ -2373,7 +2450,7 @@ class MappableDataset(SourceDataset):
2373
2450
  shard may not be part of the same split.
2374
2451
 
2375
2452
  Returns:
2376
- tuple(Dataset), a tuple of datasets that have been split.
2453
+ Tuple[Dataset], a tuple of new datasets split from the original one.
2377
2454
 
2378
2455
  Raises:
2379
2456
  RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
@@ -2455,7 +2532,7 @@ class BucketBatchByLengthDataset(UnionBaseDataset):
2455
2532
  self.pad_to_bucket_boundary, self.drop_remainder)
2456
2533
 
2457
2534
 
2458
- def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
2535
+ def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
2459
2536
  """
2460
2537
  Check sufficient shared memory is available for shared memory queues
2461
2538
  when training in parallel mode.
@@ -2467,8 +2544,8 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
2467
2544
  # on the ascend server is 8.
2468
2545
  if device_num > 1:
2469
2546
  device_num = min(device_num, 8)
2470
- shm_estimate_usage = device_num * num_worker * num_queues * \
2471
- (queue_size + 2) * max_rowsize * 1024 * 1024
2547
+ shm_estimate_usage = device_num * num_worker * \
2548
+ (queue_size + 2) * (in_rowsize + out_rowsize) * 1024 * 1024
2472
2549
  try:
2473
2550
  shm_available = psutil.disk_usage('/dev/shm').free
2474
2551
  if shm_estimate_usage >= threshold_ratio * shm_available:
@@ -2507,8 +2584,14 @@ class BatchDataset(UnionBaseDataset):
2507
2584
  len(output_columns). The size of this list must match the number of output
2508
2585
  columns of the last operation. Default: ``None``, output columns will have the same
2509
2586
  name as the input columns, i.e., the columns will be replaced.
2510
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
2511
- data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
2587
+ max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
2588
+ allocation to copy data between processes, the total occupied shared memory will increase as
2589
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
2590
+ used if python_multiprocessing is set to True. If it is an int value, it represents
2591
+ ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
2592
+ If it is a list, the first element represents the ``input_columns`` use this value as the unit to
2593
+ create shared memory, and the second element represents ``output_columns`` use this value as the unit
2594
+ to create shared memory. Default: 16.
2512
2595
 
2513
2596
  """
2514
2597
 
@@ -2534,7 +2617,10 @@ class BatchDataset(UnionBaseDataset):
2534
2617
 
2535
2618
  self.python_multiprocessing = python_multiprocessing
2536
2619
  self.process_pool = None
2537
- self.max_rowsize = max_rowsize
2620
+ if isinstance(max_rowsize, int):
2621
+ self.max_rowsize = [max_rowsize * self.batch_size] * 2
2622
+ else:
2623
+ self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
2538
2624
 
2539
2625
  def __del__(self):
2540
2626
  if hasattr(self, "process_pool") and self.process_pool is not None:
@@ -2604,7 +2690,7 @@ class BatchDataset(UnionBaseDataset):
2604
2690
  self.num_parallel_workers = get_num_parallel_workers()
2605
2691
 
2606
2692
  self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, [self.per_batch_map],
2607
- self.max_rowsize * self.batch_size)
2693
+ self.max_rowsize)
2608
2694
  # Wrap per_batch_map into _PythonCallable
2609
2695
  self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
2610
2696
  else:
@@ -2614,39 +2700,53 @@ class BatchDataset(UnionBaseDataset):
2614
2700
 
2615
2701
  class BatchInfo(cde.CBatchInfo):
2616
2702
  """
2617
- Only the batch size function and per_batch_map of the batch operation can dynamically adjust parameters
2618
- based on the number of batches and epochs during training.
2703
+ This class helps to get dataset information dynamically when the input of `batch_size` or `per_batch_map`
2704
+ in `batch` operation is a callable object.
2619
2705
  """
2620
2706
 
2621
2707
  def get_batch_num(self):
2622
2708
  """
2623
- Return the batch number of the current batch.
2709
+ Return the batch number being processed in current epoch, start from 0.
2624
2710
 
2625
2711
  Examples:
2626
2712
  >>> # Create a dataset where its batch size is dynamic
2627
2713
  >>> # Define a callable batch size function and let batch size increase 1 each time.
2628
2714
  >>> import mindspore.dataset as ds
2629
2715
  >>> from mindspore.dataset import BatchInfo
2630
- >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
2716
+ >>>
2717
+ >>> dataset = ds.GeneratorDataset([i for i in range(3)], "column1", shuffle=False)
2631
2718
  >>> def add_one(BatchInfo):
2632
2719
  ... return BatchInfo.get_batch_num() + 1
2633
2720
  >>> dataset = dataset.batch(batch_size=add_one)
2721
+ >>> print(list(dataset))
2722
+ [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[2], dtype=Int64, value= [1, 2])]]
2634
2723
  """
2635
2724
  return
2636
2725
 
2637
2726
  def get_epoch_num(self):
2638
2727
  """
2639
- Return the epoch number of the current batch.
2728
+ Return the epoch number, start from 0.
2640
2729
 
2641
2730
  Examples:
2642
2731
  >>> # Create a dataset where its batch size is dynamic
2643
2732
  >>> # Define a callable batch size function and let batch size increase 1 each epoch.
2644
2733
  >>> import mindspore.dataset as ds
2645
2734
  >>> from mindspore.dataset import BatchInfo
2646
- >>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
2735
+ >>>
2736
+ >>> dataset = ds.GeneratorDataset([i for i in range(4)], "column1", shuffle=False)
2647
2737
  >>> def add_one_by_epoch(BatchInfo):
2648
2738
  ... return BatchInfo.get_epoch_num() + 1
2649
2739
  >>> dataset = dataset.batch(batch_size=add_one_by_epoch)
2740
+ >>>
2741
+ >>> result = []
2742
+ >>> epoch = 2
2743
+ >>> iterator = dataset.create_tuple_iterator(num_epochs=epoch)
2744
+ >>> for i in range(epoch):
2745
+ ... result.extend(list(iterator))
2746
+ >>> # result:
2747
+ >>> # [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[1], dtype=Int64, value= [1])],
2748
+ >>> # [Tensor(shape=[1], dtype=Int64, value= [2])], [Tensor(shape=[1], dtype=Int64, value= [3])],
2749
+ >>> # [Tensor(shape=[2], dtype=Int64, value= [0, 1])], [Tensor(shape=[2], dtype=Int64, value= [2, 3])]]
2650
2750
  """
2651
2751
  return
2652
2752
 
@@ -2934,6 +3034,7 @@ class _PythonCallable:
2934
3034
  return self.py_callable.to_json()
2935
3035
 
2936
3036
 
3037
+ # used when python_multiprocessing=True in map
2937
3038
  class Pipe:
2938
3039
  """
2939
3040
  Class to handle communication between the master process and the worker processes.
@@ -2943,13 +3044,12 @@ class Pipe:
2943
3044
  self.shared_memory = shared_memory
2944
3045
  self.eof = multiprocessing.Event()
2945
3046
  if self.shared_memory:
2946
- self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
2947
- self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
3047
+ self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
3048
+ self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
2948
3049
  else:
2949
3050
  self.in_queue = _Queue(1)
2950
3051
  self.res_queue = _Queue(1)
2951
- self.in_queue._joincancelled = True # pylint: disable=W0212
2952
- self.res_queue._joincancelled = True # pylint: disable=W0212
3052
+ self.in_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
2953
3053
 
2954
3054
  def master_send(self, func_index, data):
2955
3055
  self.in_queue.put_nowait((func_index, *data))
@@ -2965,8 +3065,6 @@ class Pipe:
2965
3065
  self.eof.set()
2966
3066
  self.send_finish_signal_to_worker()
2967
3067
  self.send_finish_signal()
2968
- self.res_queue.cancel_join_thread()
2969
- self.in_queue.cancel_join_thread()
2970
3068
 
2971
3069
  def send_finish_signal(self):
2972
3070
  self.worker_send(None)
@@ -2986,10 +3084,6 @@ class Pipe:
2986
3084
  func_index, *data = result
2987
3085
  return func_index, tuple(data)
2988
3086
 
2989
- def worker_close(self):
2990
- self.res_queue.cancel_join_thread()
2991
- self.in_queue.cancel_join_thread()
2992
-
2993
3087
 
2994
3088
  def _main_process_already_exit():
2995
3089
  """
@@ -3007,6 +3101,8 @@ def _worker_loop(operations, pipe, seed=get_seed()):
3007
3101
  """
3008
3102
  Multiprocess worker process loop.
3009
3103
  """
3104
+ # Ensure that the process does not hung when exiting
3105
+ pipe.res_queue.cancel_join_thread()
3010
3106
 
3011
3107
  def _ignore_sigint():
3012
3108
  """
@@ -3022,11 +3118,9 @@ def _worker_loop(operations, pipe, seed=get_seed()):
3022
3118
 
3023
3119
  result = pipe.worker_receive()
3024
3120
  if result is None:
3025
- pipe.worker_close()
3026
3121
  return
3027
3122
  (idx, input_tensors) = result
3028
3123
  if input_tensors == "QUIT":
3029
- pipe.worker_close()
3030
3124
  break
3031
3125
  try:
3032
3126
  output_tensors = operations[idx](*input_tensors)
@@ -3093,6 +3187,9 @@ class _MPWorker(multiprocessing.Process):
3093
3187
  def close(self):
3094
3188
  try:
3095
3189
  if self.is_alive():
3190
+ # release the eager executor which is used by current process
3191
+ transforms.transforms.clean_unused_executors()
3192
+
3096
3193
  logger.info(f"Closing worker with PID: {self.pid}")
3097
3194
  self.pipe.master_close()
3098
3195
  # del the handle which hold by master
@@ -3140,12 +3237,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3140
3237
  self.origin_hook(ex_type, value, tb)
3141
3238
  self.mp_pool_exit_preprocess()
3142
3239
 
3143
- def __init__(self, op_name, num_parallel_workers, operations, max_row_size=16):
3240
+ def __init__(self, op_name, num_parallel_workers, operations, max_rowsize=16):
3144
3241
  super(_PythonMultiprocessing, self).__init__()
3145
3242
  self.op_name = op_name
3146
3243
  self.num_parallel_workers = num_parallel_workers
3147
3244
  self.operations = operations
3148
- self.max_row_size = max_row_size
3245
+ self.max_rowsize = max_rowsize
3149
3246
 
3150
3247
  self.workers = None
3151
3248
  self.pids = None
@@ -3336,7 +3433,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3336
3433
 
3337
3434
  """
3338
3435
  if get_enable_shared_mem():
3339
- self.check_shared_memory()
3436
+ _check_shm_usage(self.num_parallel_workers, 1, self.max_rowsize[0], self.max_rowsize[1])
3340
3437
 
3341
3438
  if self.workers is not None:
3342
3439
  raise Exception("Pool was already created, close it first.")
@@ -3348,7 +3445,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3348
3445
  self.workers = []
3349
3446
  self.warning_ctl = multiprocessing.Value('i', 0)
3350
3447
  for i in range(self.num_parallel_workers):
3351
- worker = _MPWorker(self.operations, self.warning_ctl, self.max_row_size, i + get_seed())
3448
+ worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, i + get_seed())
3352
3449
  worker.start()
3353
3450
  self.workers.append(worker)
3354
3451
 
@@ -3410,12 +3507,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3410
3507
  def is_mp_enabled(self):
3411
3508
  return self.workers is not None
3412
3509
 
3413
- def check_shared_memory(self):
3414
- """
3415
- Check if there is enough shared memory in the system.
3416
- """
3417
- _check_shm_usage(self.num_parallel_workers, 1, self.max_row_size, 2)
3418
-
3419
3510
  def execute(self, idx, *args):
3420
3511
  """
3421
3512
  Execute
@@ -3524,8 +3615,13 @@ class MapDataset(UnionBaseDataset):
3524
3615
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
3525
3616
  Default: ``None``, which means no cache is used.
3526
3617
  callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: ``None``.
3527
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
3528
- data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
3618
+ max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
3619
+ allocation to copy data between processes, the total occupied shared memory will increase as
3620
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
3621
+ used if python_multiprocessing is set to True. If it is an int value, it represents ``input_columns`` and
3622
+ ``output_columns`` use this value as the unit to create shared memory. If it is a list, the first element
3623
+ represents the ``input_columns`` use this value as the unit to create shared memory, and the second element
3624
+ represents ``output_columns`` use this value as the unit to create shared memory. Default: 16.
3529
3625
  offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
3530
3626
  """
3531
3627
 
@@ -3556,7 +3652,10 @@ class MapDataset(UnionBaseDataset):
3556
3652
  self.process_pool = None
3557
3653
 
3558
3654
  self.callbacks = to_list(callbacks)
3559
- self.max_rowsize = max_rowsize
3655
+ if isinstance(max_rowsize, int):
3656
+ self.max_rowsize = [max_rowsize] * 2
3657
+ else:
3658
+ self.max_rowsize = max_rowsize
3560
3659
  self.offload = offload
3561
3660
 
3562
3661
  def parse(self, children=None):
@@ -3594,7 +3693,7 @@ class MapDataset(UnionBaseDataset):
3594
3693
 
3595
3694
  callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
3596
3695
  return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
3597
- callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3696
+ callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3598
3697
 
3599
3698
  def __deepcopy__(self, memodict):
3600
3699
  return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
@@ -3898,6 +3997,8 @@ class ConcatDataset(UnionBaseDataset):
3898
3997
  "valid samples in the dataset." % child_index)
3899
3998
  child_index += 1
3900
3999
 
4000
+ self._children_sizes = self.children_sizes_.copy()
4001
+
3901
4002
  # _children_flag_and_nums: A list of pair<int ,int>.The first element of pair is flag that characterizes
3902
4003
  # whether the dataset is mappable. The second element of pair is length of the dataset
3903
4004
  self._children_flag_and_nums = []
@@ -3921,7 +4022,8 @@ class ConcatDataset(UnionBaseDataset):
3921
4022
  self._children_flag_and_nums.append((1, dataset_len))
3922
4023
 
3923
4024
  def parse(self, children=None):
3924
- return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_)
4025
+ return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_,
4026
+ self._children_sizes)
3925
4027
 
3926
4028
  def use_sampler(self, sampler):
3927
4029
  """
@@ -3937,8 +4039,19 @@ class ConcatDataset(UnionBaseDataset):
3937
4039
  ValueError: If the parameter NumSamples of sampler is not None.
3938
4040
  ValueError: If num_shards <=0.
3939
4041
  """
3940
- if not isinstance(sampler, samplers.DistributedSampler):
3941
- raise TypeError("The parameter %s of concat must be DistributedSampler!" % sampler)
4042
+ if not isinstance(sampler, (samplers.DistributedSampler, samplers.RandomSampler)):
4043
+ raise TypeError("The parameter %s of concat must be DistributedSampler or RandomSampler!" % sampler)
4044
+
4045
+ if isinstance(sampler, samplers.RandomSampler):
4046
+ if sampler.replacement:
4047
+ raise ValueError("The parameter replacement of RandomSampler must be False!")
4048
+
4049
+ if sampler.get_num_samples() is not None:
4050
+ raise ValueError("The parameter num_samples of RandomSampler is not support to be set!")
4051
+
4052
+ self._sampler = sampler
4053
+ self._children_sizes = [c.get_dataset_size() for c in self.children]
4054
+ return
3942
4055
 
3943
4056
  if sampler.is_shuffled():
3944
4057
  raise ValueError("The parameter shuffle of DistributedSampler must be False!")
@@ -4041,7 +4154,12 @@ class _ToDevice:
4041
4154
  self._runtime_context = cde.PythonRuntimeContext()
4042
4155
  self._runtime_context.Init()
4043
4156
  self._to_device = cde.ToDevice(num_epochs)
4044
- self._to_device.Init(ir_tree)
4157
+ if dataset.get_init_step() != 0:
4158
+ init_step = dataset.get_init_step()
4159
+ dataset_size = dataset.get_dataset_size()
4160
+ self._to_device.Init(ir_tree, init_step, dataset_size)
4161
+ else:
4162
+ self._to_device.Init(ir_tree, 0, -1)
4045
4163
  self._runtime_context.AssignConsumer(self._to_device)
4046
4164
 
4047
4165
  ITERATORS_LIST.append(weakref.ref(self))
@@ -4068,6 +4186,14 @@ class _ToDevice:
4068
4186
  """
4069
4187
  return self._to_device.GetDataInfo()
4070
4188
 
4189
+ def get_send_info(self):
4190
+ """
4191
+ In sink mode, it returns the send information of dataset at this moment.
4192
+ Send information includes number of send batches, time summary of fetching data on host
4193
+ and time summary of sending data.
4194
+ """
4195
+ return self._to_device.GetSendInfo()
4196
+
4071
4197
  def release(self):
4072
4198
  """
4073
4199
  Manually terminate Device Queue instead of relying on out of scope destruction.
@@ -4088,8 +4214,8 @@ class _ToDevice:
4088
4214
  offload_model = GetOffloadModel(self._to_device, col_names)
4089
4215
  return offload_model
4090
4216
 
4091
- def _reset(self, step, epoch):
4092
- self._to_device.Reset(step, epoch)
4217
+ def _reset(self, step, dataset_size):
4218
+ self._to_device.Reset(step, dataset_size)
4093
4219
 
4094
4220
 
4095
4221
  class TransferDataset(Dataset):
@@ -4108,9 +4234,14 @@ class TransferDataset(Dataset):
4108
4234
  RuntimeError: If dataset is unknown.
4109
4235
  """
4110
4236
 
4111
- def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False):
4237
+ def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
4112
4238
  super().__init__(children=input_dataset)
4113
- self.queue_name = str(uuid.uuid1())
4239
+ if queue_name == "":
4240
+ self.queue_name = str(uuid.uuid1())
4241
+ logger.info(f"queue_name is newly generated. value is {self.queue_name}")
4242
+ else:
4243
+ self.queue_name = queue_name
4244
+ logger.info(f"queue_name is read from compile cache. value is {self.queue_name}")
4114
4245
  self.device_type = context.get_context("device_target") if context else "CPU"
4115
4246
  self.device_id = context.get_context("device_id") if context else 0
4116
4247
 
@@ -4169,6 +4300,16 @@ class TransferDataset(Dataset):
4169
4300
  return self._to_device.get_data_info()
4170
4301
  raise RuntimeError("Calling get_data_info with bad state.")
4171
4302
 
4303
+ def get_send_info(self):
4304
+ """
4305
+ In sink mode, it returns the send information of dataset at this moment.
4306
+ Send information includes number of send batches, time summary of fetching data on host
4307
+ and time summary of sending data.
4308
+ """
4309
+ if self._to_device is not None:
4310
+ return self._to_device.get_send_info()
4311
+ raise RuntimeError("Calling get_send_info with bad state, data queue is not initialized.")
4312
+
4172
4313
  def get_offload_model(self):
4173
4314
  if self._to_device is not None:
4174
4315
  return self._to_device.get_offload_model(self.column_name)
@@ -4182,10 +4323,10 @@ class TransferDataset(Dataset):
4182
4323
  if self._to_device is not None:
4183
4324
  self._to_device.release()
4184
4325
 
4185
- def _reset(self, step, epoch):
4326
+ def _reset(self, step, dataset_size):
4186
4327
  if self._to_device is not None:
4187
- logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(epoch))
4188
- self._to_device._reset(step, epoch) # pylint: disable=protected-access
4328
+ logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(step // dataset_size))
4329
+ self._to_device._reset(step, dataset_size) # pylint: disable=protected-access
4189
4330
 
4190
4331
 
4191
4332
  class Schema:
@@ -4193,10 +4334,7 @@ class Schema:
4193
4334
  Class to represent a schema of a dataset.
4194
4335
 
4195
4336
  Args:
4196
- schema_file(str): Path of the schema file. Default: ``None``.
4197
-
4198
- Returns:
4199
- Schema object, schema info about dataset.
4337
+ schema_file (str): Path of the schema file. Default: ``None``.
4200
4338
 
4201
4339
  Raises:
4202
4340
  RuntimeError: If schema file failed to load.
@@ -4306,7 +4444,7 @@ class Schema:
4306
4444
  >>> import json
4307
4445
  >>> from mindspore.dataset import Schema
4308
4446
  >>>
4309
- >>> with open("/path/to/schema_file") as file:
4447
+ >>> with open("/path/to/schema_file", "r") as file:
4310
4448
  ... json_obj = json.load(file)
4311
4449
  ... schema = Schema()
4312
4450
  ... schema.from_json(json_obj)