mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (602) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +13 -6
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -0
  9. mindspore/_checkparam.py +3 -38
  10. mindspore/_deprecated/__init__.py +17 -0
  11. mindspore/_deprecated/jit.py +198 -0
  12. mindspore/_extends/builtin_operations.py +1 -1
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +6 -7
  15. mindspore/_extends/parse/compile_config.py +83 -0
  16. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  17. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  18. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  19. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  20. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  21. mindspore/_extends/parse/parser.py +46 -197
  22. mindspore/_extends/parse/resources.py +1 -5
  23. mindspore/_extends/parse/standard_method.py +217 -98
  24. mindspore/_extends/pijit/__init__.py +2 -2
  25. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  26. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  27. mindspore/_extends/utils.py +1 -1
  28. mindspore/amp.py +11 -5
  29. mindspore/atlprov.dll +0 -0
  30. mindspore/avcodec-59.dll +0 -0
  31. mindspore/avdevice-59.dll +0 -0
  32. mindspore/avfilter-8.dll +0 -0
  33. mindspore/avformat-59.dll +0 -0
  34. mindspore/avutil-57.dll +0 -0
  35. mindspore/boost/__init__.py +2 -2
  36. mindspore/boost/base.py +3 -7
  37. mindspore/boost/boost_cell_wrapper.py +138 -43
  38. mindspore/c1.dll +0 -0
  39. mindspore/c1xx.dll +0 -0
  40. mindspore/c2.dll +0 -0
  41. mindspore/common/__init__.py +6 -3
  42. mindspore/common/_grad_function.py +56 -0
  43. mindspore/common/_pijit_context.py +14 -5
  44. mindspore/common/_register_for_tensor.py +1 -2
  45. mindspore/common/_stub_tensor.py +30 -14
  46. mindspore/common/_tensor_cpp_method.py +17 -0
  47. mindspore/common/_tensor_docs.py +4760 -0
  48. mindspore/common/api.py +435 -371
  49. mindspore/common/auto_dynamic_shape.py +41 -44
  50. mindspore/common/dtype.py +39 -36
  51. mindspore/common/dump.py +9 -6
  52. mindspore/common/file_system.py +9 -1
  53. mindspore/common/generator.py +2 -0
  54. mindspore/common/hook_handle.py +6 -2
  55. mindspore/common/initializer.py +13 -10
  56. mindspore/common/jit_begin_end.py +94 -0
  57. mindspore/common/jit_config.py +6 -1
  58. mindspore/common/jit_context.py +76 -0
  59. mindspore/common/jit_trace.py +378 -0
  60. mindspore/common/lazy_inline.py +9 -3
  61. mindspore/common/mindir_util.py +10 -2
  62. mindspore/common/mutable.py +5 -4
  63. mindspore/common/parameter.py +135 -52
  64. mindspore/common/seed.py +2 -2
  65. mindspore/common/sparse_tensor.py +23 -17
  66. mindspore/common/tensor.py +951 -1992
  67. mindspore/communication/__init__.py +7 -5
  68. mindspore/communication/_comm_helper.py +52 -2
  69. mindspore/communication/comm_func.py +240 -181
  70. mindspore/communication/management.py +95 -26
  71. mindspore/context.py +314 -566
  72. mindspore/dataset/__init__.py +65 -37
  73. mindspore/dataset/audio/__init__.py +2 -8
  74. mindspore/dataset/audio/transforms.py +3 -17
  75. mindspore/dataset/callback/ds_callback.py +2 -1
  76. mindspore/dataset/core/config.py +87 -6
  77. mindspore/dataset/engine/cache_admin.py +3 -3
  78. mindspore/dataset/engine/cache_client.py +6 -5
  79. mindspore/dataset/engine/datasets.py +292 -267
  80. mindspore/dataset/engine/datasets_audio.py +22 -8
  81. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  82. mindspore/dataset/engine/datasets_text.py +78 -48
  83. mindspore/dataset/engine/datasets_user_defined.py +182 -116
  84. mindspore/dataset/engine/datasets_vision.py +120 -44
  85. mindspore/dataset/engine/iterators.py +283 -63
  86. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  87. mindspore/dataset/engine/obs/util.py +8 -0
  88. mindspore/dataset/engine/queue.py +40 -0
  89. mindspore/dataset/engine/samplers.py +289 -43
  90. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  91. mindspore/dataset/engine/validators.py +53 -11
  92. mindspore/dataset/text/__init__.py +7 -6
  93. mindspore/dataset/text/transforms.py +6 -5
  94. mindspore/dataset/text/utils.py +3 -3
  95. mindspore/dataset/transforms/__init__.py +0 -9
  96. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  97. mindspore/dataset/transforms/transforms.py +31 -14
  98. mindspore/dataset/utils/browse_dataset.py +1 -1
  99. mindspore/dataset/vision/__init__.py +2 -9
  100. mindspore/dataset/vision/transforms.py +202 -158
  101. mindspore/dataset/vision/utils.py +7 -5
  102. mindspore/dataset/vision/validators.py +1 -2
  103. mindspore/device_context/__init__.py +21 -0
  104. mindspore/device_context/ascend/__init__.py +25 -0
  105. mindspore/device_context/ascend/device.py +72 -0
  106. mindspore/device_context/ascend/op_debug.py +153 -0
  107. mindspore/device_context/ascend/op_precision.py +193 -0
  108. mindspore/device_context/ascend/op_tuning.py +123 -0
  109. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  110. mindspore/device_context/cpu/device.py +62 -0
  111. mindspore/device_context/cpu/op_tuning.py +43 -0
  112. mindspore/device_context/gpu/__init__.py +21 -0
  113. mindspore/device_context/gpu/device.py +70 -0
  114. mindspore/device_context/gpu/op_precision.py +67 -0
  115. mindspore/device_context/gpu/op_tuning.py +175 -0
  116. mindspore/device_manager.py +170 -0
  117. mindspore/dnnl.dll +0 -0
  118. mindspore/dpcmi.dll +0 -0
  119. mindspore/experimental/es/embedding_service.py +35 -27
  120. mindspore/experimental/llm_boost/__init__.py +1 -0
  121. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  122. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  123. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  124. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  125. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  126. mindspore/experimental/llm_boost/register.py +1 -0
  127. mindspore/experimental/map_parameter.py +4 -4
  128. mindspore/experimental/optim/adadelta.py +6 -6
  129. mindspore/experimental/optim/adagrad.py +4 -4
  130. mindspore/experimental/optim/adam.py +7 -0
  131. mindspore/experimental/optim/adamax.py +4 -4
  132. mindspore/experimental/optim/adamw.py +4 -0
  133. mindspore/experimental/optim/asgd.py +1 -1
  134. mindspore/experimental/optim/lr_scheduler.py +73 -46
  135. mindspore/experimental/optim/radam.py +34 -31
  136. mindspore/experimental/optim/rprop.py +1 -1
  137. mindspore/experimental/optim/sgd.py +1 -1
  138. mindspore/hal/contiguous_tensors_handle.py +6 -10
  139. mindspore/hal/device.py +55 -53
  140. mindspore/hal/event.py +52 -52
  141. mindspore/hal/memory.py +157 -117
  142. mindspore/hal/stream.py +150 -109
  143. mindspore/include/api/context.h +0 -1
  144. mindspore/include/dataset/constants.h +7 -4
  145. mindspore/include/dataset/execute.h +2 -2
  146. mindspore/jpeg62.dll +0 -0
  147. mindspore/log.py +50 -0
  148. mindspore/mindrecord/__init__.py +21 -8
  149. mindspore/mindrecord/config.py +17 -316
  150. mindspore/mindrecord/filereader.py +1 -9
  151. mindspore/mindrecord/filewriter.py +5 -15
  152. mindspore/mindrecord/mindpage.py +1 -9
  153. mindspore/mindspore_backend_common.dll +0 -0
  154. mindspore/mindspore_backend_manager.dll +0 -0
  155. mindspore/mindspore_common.dll +0 -0
  156. mindspore/mindspore_core.dll +0 -0
  157. mindspore/mindspore_dump.dll +0 -0
  158. mindspore/mindspore_frontend.dll +0 -0
  159. mindspore/mindspore_glog.dll +0 -0
  160. mindspore/mindspore_memory_pool.dll +0 -0
  161. mindspore/mindspore_ms_backend.dll +0 -0
  162. mindspore/mindspore_ops.dll +0 -0
  163. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  164. mindspore/mindspore_ops_kernel_common.dll +0 -0
  165. mindspore/mindspore_profiler.dll +0 -0
  166. mindspore/mindspore_pyboost.dll +0 -0
  167. mindspore/mindspore_pynative.dll +0 -0
  168. mindspore/mindspore_res_manager.dll +0 -0
  169. mindspore/mindspore_runtime_pipeline.dll +0 -0
  170. mindspore/mint/__init__.py +796 -759
  171. mindspore/mint/distributed/__init__.py +70 -4
  172. mindspore/mint/distributed/distributed.py +2679 -44
  173. mindspore/mint/linalg/__init__.py +8 -0
  174. mindspore/mint/nn/__init__.py +743 -22
  175. mindspore/mint/nn/functional.py +716 -23
  176. mindspore/mint/nn/layer/__init__.py +21 -4
  177. mindspore/mint/nn/layer/_functions.py +334 -0
  178. mindspore/mint/nn/layer/activation.py +276 -1
  179. mindspore/mint/nn/layer/basic.py +123 -0
  180. mindspore/mint/nn/layer/conv.py +921 -0
  181. mindspore/mint/nn/layer/normalization.py +223 -28
  182. mindspore/mint/nn/layer/padding.py +797 -0
  183. mindspore/mint/nn/layer/pooling.py +235 -0
  184. mindspore/mint/optim/__init__.py +3 -1
  185. mindspore/mint/optim/adam.py +223 -0
  186. mindspore/mint/optim/adamw.py +26 -19
  187. mindspore/mint/optim/sgd.py +171 -0
  188. mindspore/mint/special/__init__.py +2 -1
  189. mindspore/msobj140.dll +0 -0
  190. mindspore/mspdb140.dll +0 -0
  191. mindspore/mspdbcore.dll +0 -0
  192. mindspore/mspdbst.dll +0 -0
  193. mindspore/mspft140.dll +0 -0
  194. mindspore/msvcdis140.dll +0 -0
  195. mindspore/msvcp140_1.dll +0 -0
  196. mindspore/msvcp140_2.dll +0 -0
  197. mindspore/msvcp140_atomic_wait.dll +0 -0
  198. mindspore/msvcp140_codecvt_ids.dll +0 -0
  199. mindspore/multiprocessing/__init__.py +5 -0
  200. mindspore/nn/__init__.py +4 -1
  201. mindspore/nn/cell.py +1370 -189
  202. mindspore/nn/dynamic_lr.py +2 -1
  203. mindspore/nn/layer/activation.py +29 -27
  204. mindspore/nn/layer/basic.py +51 -35
  205. mindspore/nn/layer/channel_shuffle.py +3 -3
  206. mindspore/nn/layer/container.py +1 -1
  207. mindspore/nn/layer/conv.py +22 -17
  208. mindspore/nn/layer/embedding.py +12 -11
  209. mindspore/nn/layer/normalization.py +56 -49
  210. mindspore/nn/layer/padding.py +4 -3
  211. mindspore/nn/layer/pooling.py +120 -42
  212. mindspore/nn/layer/rnn_cells.py +1 -1
  213. mindspore/nn/layer/rnns.py +2 -1
  214. mindspore/nn/layer/timedistributed.py +5 -5
  215. mindspore/nn/layer/transformer.py +59 -36
  216. mindspore/nn/learning_rate_schedule.py +8 -4
  217. mindspore/nn/loss/loss.py +58 -55
  218. mindspore/nn/optim/ada_grad.py +7 -5
  219. mindspore/nn/optim/adadelta.py +11 -9
  220. mindspore/nn/optim/adafactor.py +1 -1
  221. mindspore/nn/optim/adam.py +17 -13
  222. mindspore/nn/optim/adamax.py +8 -7
  223. mindspore/nn/optim/adasum.py +5 -5
  224. mindspore/nn/optim/asgd.py +1 -1
  225. mindspore/nn/optim/ftrl.py +11 -9
  226. mindspore/nn/optim/lamb.py +1 -1
  227. mindspore/nn/optim/lars.py +1 -4
  228. mindspore/nn/optim/lazyadam.py +12 -10
  229. mindspore/nn/optim/momentum.py +7 -6
  230. mindspore/nn/optim/optimizer.py +3 -3
  231. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  232. mindspore/nn/optim/rmsprop.py +13 -12
  233. mindspore/nn/optim/rprop.py +11 -9
  234. mindspore/nn/optim/sgd.py +9 -6
  235. mindspore/nn/optim/tft_wrapper.py +5 -2
  236. mindspore/nn/optim/thor.py +2 -1
  237. mindspore/nn/probability/bijector/bijector.py +17 -11
  238. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  239. mindspore/nn/probability/bijector/invert.py +2 -2
  240. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  241. mindspore/nn/probability/bijector/softplus.py +3 -2
  242. mindspore/nn/probability/distribution/beta.py +3 -3
  243. mindspore/nn/probability/distribution/categorical.py +1 -1
  244. mindspore/nn/probability/distribution/cauchy.py +4 -2
  245. mindspore/nn/probability/distribution/exponential.py +6 -7
  246. mindspore/nn/probability/distribution/gamma.py +2 -2
  247. mindspore/nn/probability/distribution/gumbel.py +2 -2
  248. mindspore/nn/probability/distribution/half_normal.py +5 -3
  249. mindspore/nn/probability/distribution/logistic.py +5 -3
  250. mindspore/nn/probability/distribution/poisson.py +1 -1
  251. mindspore/nn/probability/distribution/uniform.py +5 -3
  252. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  253. mindspore/nn/reinforcement/tensor_array.py +1 -1
  254. mindspore/nn/utils/init.py +13 -11
  255. mindspore/nn/wrap/__init__.py +6 -6
  256. mindspore/nn/wrap/cell_wrapper.py +181 -122
  257. mindspore/nn/wrap/grad_reducer.py +45 -36
  258. mindspore/nn/wrap/loss_scale.py +6 -7
  259. mindspore/numpy/array_creations.py +63 -65
  260. mindspore/numpy/array_ops.py +149 -144
  261. mindspore/numpy/logic_ops.py +41 -42
  262. mindspore/numpy/math_ops.py +365 -363
  263. mindspore/numpy/utils.py +17 -18
  264. mindspore/numpy/utils_const.py +5 -6
  265. mindspore/opencv_core452.dll +0 -0
  266. mindspore/opencv_imgcodecs452.dll +0 -0
  267. mindspore/opencv_imgproc452.dll +0 -0
  268. mindspore/ops/__init__.py +5 -3
  269. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  270. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  271. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  272. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  273. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  274. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  275. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  276. mindspore/ops/_register_for_op.py +0 -11
  277. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  278. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  279. mindspore/ops/_vmap/vmap_array_ops.py +27 -25
  280. mindspore/ops/_vmap/vmap_base.py +0 -2
  281. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  282. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  283. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  284. mindspore/ops/auto_generate/__init__.py +4 -3
  285. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
  286. mindspore/ops/auto_generate/gen_extend_func.py +764 -124
  287. mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
  288. mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
  289. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  290. mindspore/ops/composite/__init__.py +2 -1
  291. mindspore/ops/composite/base.py +20 -25
  292. mindspore/ops/composite/math_ops.py +6 -16
  293. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  294. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  295. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  296. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  297. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  299. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  301. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  302. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  303. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  304. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  305. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  306. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  307. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  308. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  309. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  310. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  311. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  312. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  313. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  314. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  315. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  316. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  317. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  318. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  319. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  320. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  321. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  322. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  323. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  324. mindspore/ops/function/__init__.py +40 -2
  325. mindspore/ops/function/_add_attr_func.py +58 -0
  326. mindspore/ops/function/array_func.py +2089 -2403
  327. mindspore/ops/function/clip_func.py +80 -23
  328. mindspore/ops/function/debug_func.py +57 -57
  329. mindspore/ops/function/grad/__init__.py +1 -0
  330. mindspore/ops/function/grad/grad_func.py +104 -71
  331. mindspore/ops/function/image_func.py +2 -2
  332. mindspore/ops/function/linalg_func.py +47 -78
  333. mindspore/ops/function/math_func.py +4501 -3802
  334. mindspore/ops/function/nn_func.py +1726 -620
  335. mindspore/ops/function/other_func.py +159 -1
  336. mindspore/ops/function/parameter_func.py +18 -84
  337. mindspore/ops/function/random_func.py +440 -387
  338. mindspore/ops/function/reshard_func.py +4 -70
  339. mindspore/ops/function/sparse_func.py +3 -3
  340. mindspore/ops/function/sparse_unary_func.py +6 -6
  341. mindspore/ops/function/spectral_func.py +25 -58
  342. mindspore/ops/function/vmap_func.py +24 -17
  343. mindspore/ops/functional.py +22 -7
  344. mindspore/ops/functional_overload.py +1440 -0
  345. mindspore/ops/op_info_register.py +32 -244
  346. mindspore/ops/operations/__init__.py +13 -7
  347. mindspore/ops/operations/_custom_ops_utils.py +247 -0
  348. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  349. mindspore/ops/operations/_grad_ops.py +2 -43
  350. mindspore/ops/operations/_infer_ops.py +2 -1
  351. mindspore/ops/operations/_inner_ops.py +43 -84
  352. mindspore/ops/operations/_ms_kernel.py +4 -10
  353. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  354. mindspore/ops/operations/_scalar_ops.py +3 -2
  355. mindspore/ops/operations/_sequence_ops.py +1 -1
  356. mindspore/ops/operations/_tensor_array.py +1 -1
  357. mindspore/ops/operations/array_ops.py +81 -324
  358. mindspore/ops/operations/comm_ops.py +154 -108
  359. mindspore/ops/operations/custom_ops.py +232 -78
  360. mindspore/ops/operations/debug_ops.py +153 -59
  361. mindspore/ops/operations/inner_ops.py +7 -5
  362. mindspore/ops/operations/linalg_ops.py +1 -57
  363. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  364. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  365. mindspore/ops/operations/math_ops.py +32 -234
  366. mindspore/ops/operations/nn_ops.py +210 -498
  367. mindspore/ops/operations/other_ops.py +62 -9
  368. mindspore/ops/operations/random_ops.py +13 -7
  369. mindspore/ops/operations/reshard_ops.py +1 -1
  370. mindspore/ops/operations/sparse_ops.py +2 -2
  371. mindspore/ops/primitive.py +66 -53
  372. mindspore/ops/tensor_method.py +1888 -0
  373. mindspore/ops_generate/__init__.py +0 -5
  374. mindspore/ops_generate/aclnn/__init__.py +0 -0
  375. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  376. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  377. mindspore/ops_generate/api/__init__.py +0 -0
  378. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  379. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  380. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  381. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  382. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  383. mindspore/ops_generate/api/gen_api.py +103 -0
  384. mindspore/ops_generate/api/op_api_proto.py +235 -0
  385. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  386. mindspore/ops_generate/common/__init__.py +0 -0
  387. mindspore/ops_generate/common/base_generator.py +11 -0
  388. mindspore/ops_generate/common/gen_constants.py +91 -0
  389. mindspore/ops_generate/common/gen_utils.py +348 -0
  390. mindspore/ops_generate/common/op_proto.py +473 -0
  391. mindspore/ops_generate/common/template.py +523 -0
  392. mindspore/ops_generate/gen_ops.py +22 -1069
  393. mindspore/ops_generate/op_def/__init__.py +0 -0
  394. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  395. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  396. mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
  397. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  398. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  399. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  400. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  401. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  402. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  403. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  404. mindspore/ops_generate/pyboost/__init__.py +0 -0
  405. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  406. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  407. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  408. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  409. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  410. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  411. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  412. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  413. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  414. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  415. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  416. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  417. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  418. mindspore/ops_generate/resources/__init__.py +0 -0
  419. mindspore/ops_generate/resources/resource_list.py +30 -0
  420. mindspore/ops_generate/resources/resource_loader.py +36 -0
  421. mindspore/ops_generate/resources/resource_manager.py +64 -0
  422. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  423. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  424. mindspore/parallel/__init__.py +7 -3
  425. mindspore/parallel/_auto_parallel_context.py +152 -34
  426. mindspore/parallel/_cell_wrapper.py +130 -15
  427. mindspore/parallel/_parallel_serialization.py +107 -5
  428. mindspore/parallel/_ps_context.py +1 -1
  429. mindspore/parallel/_recovery_context.py +7 -2
  430. mindspore/parallel/_tensor.py +142 -18
  431. mindspore/parallel/_utils.py +199 -23
  432. mindspore/parallel/algo_parameter_config.py +4 -4
  433. mindspore/parallel/auto_parallel.py +732 -0
  434. mindspore/parallel/checkpoint_convert.py +159 -0
  435. mindspore/parallel/checkpoint_transform.py +698 -35
  436. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  437. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  438. mindspore/parallel/cluster/run.py +21 -4
  439. mindspore/parallel/function/__init__.py +24 -0
  440. mindspore/parallel/function/reshard_func.py +259 -0
  441. mindspore/parallel/nn/__init__.py +25 -0
  442. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  443. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  444. mindspore/parallel/parameter_broadcast.py +25 -14
  445. mindspore/parallel/shard.py +137 -58
  446. mindspore/parallel/transform_safetensors.py +363 -305
  447. mindspore/pgodb140.dll +0 -0
  448. mindspore/pgort140.dll +0 -0
  449. mindspore/profiler/__init__.py +22 -5
  450. mindspore/profiler/analysis/__init__.py +0 -0
  451. mindspore/profiler/analysis/parser/__init__.py +0 -0
  452. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  453. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  454. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  455. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  456. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  457. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  458. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  459. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  460. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
  461. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  462. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  463. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  464. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  465. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  466. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  467. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  468. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  469. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  470. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  471. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  472. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  473. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  474. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  475. mindspore/profiler/analysis/task_manager.py +131 -0
  476. mindspore/profiler/analysis/time_converter.py +84 -0
  477. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  478. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  479. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  480. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  481. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  482. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  483. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  484. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  485. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  486. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  487. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  488. mindspore/profiler/analysis/work_flow.py +73 -0
  489. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  490. mindspore/profiler/common/command_executor.py +90 -0
  491. mindspore/profiler/common/constant.py +186 -3
  492. mindspore/profiler/common/file_manager.py +208 -0
  493. mindspore/profiler/common/log.py +130 -0
  494. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  495. mindspore/profiler/common/path_manager.py +395 -0
  496. mindspore/profiler/common/process_bar.py +168 -0
  497. mindspore/profiler/common/process_pool.py +9 -3
  498. mindspore/profiler/common/profiler_context.py +500 -0
  499. mindspore/profiler/common/profiler_info.py +304 -0
  500. mindspore/profiler/common/profiler_meta_data.py +74 -0
  501. mindspore/profiler/common/profiler_output_path.py +284 -0
  502. mindspore/profiler/common/profiler_parameters.py +251 -0
  503. mindspore/profiler/common/profiler_path_manager.py +179 -0
  504. mindspore/profiler/common/record_function.py +76 -0
  505. mindspore/profiler/common/tlv_decoder.py +76 -0
  506. mindspore/profiler/common/util.py +75 -2
  507. mindspore/profiler/dynamic_profiler.py +341 -75
  508. mindspore/profiler/envprofiler.py +163 -0
  509. mindspore/profiler/experimental_config.py +197 -0
  510. mindspore/profiler/mstx.py +242 -0
  511. mindspore/profiler/platform/__init__.py +21 -0
  512. mindspore/profiler/platform/base_profiler.py +40 -0
  513. mindspore/profiler/platform/cpu_profiler.py +124 -0
  514. mindspore/profiler/platform/gpu_profiler.py +74 -0
  515. mindspore/profiler/platform/npu_profiler.py +335 -0
  516. mindspore/profiler/profiler.py +1073 -90
  517. mindspore/profiler/profiler_action_controller.py +187 -0
  518. mindspore/profiler/profiler_interface.py +118 -0
  519. mindspore/profiler/schedule.py +243 -0
  520. mindspore/rewrite/api/node.py +15 -13
  521. mindspore/rewrite/api/symbol_tree.py +2 -3
  522. mindspore/run_check/_check_version.py +27 -20
  523. mindspore/run_check/run_check.py +1 -1
  524. mindspore/runtime/__init__.py +37 -0
  525. mindspore/runtime/device.py +27 -0
  526. mindspore/runtime/event.py +209 -0
  527. mindspore/runtime/executor.py +177 -0
  528. mindspore/runtime/memory.py +409 -0
  529. mindspore/runtime/stream.py +460 -0
  530. mindspore/runtime/thread_bind_core.py +401 -0
  531. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  532. mindspore/swresample-4.dll +0 -0
  533. mindspore/swscale-6.dll +0 -0
  534. mindspore/tbbmalloc.dll +0 -0
  535. mindspore/tinyxml2.dll +0 -0
  536. mindspore/train/__init__.py +8 -8
  537. mindspore/train/_utils.py +88 -25
  538. mindspore/train/amp.py +9 -5
  539. mindspore/train/callback/__init__.py +2 -2
  540. mindspore/train/callback/_callback.py +2 -16
  541. mindspore/train/callback/_checkpoint.py +53 -55
  542. mindspore/train/callback/_cluster_monitor.py +14 -18
  543. mindspore/train/callback/_early_stop.py +1 -1
  544. mindspore/train/callback/_flops_collector.py +103 -68
  545. mindspore/train/callback/_history.py +8 -5
  546. mindspore/train/callback/_lambda_callback.py +2 -2
  547. mindspore/train/callback/_landscape.py +0 -3
  548. mindspore/train/callback/_loss_monitor.py +2 -1
  549. mindspore/train/callback/_on_request_exit.py +6 -5
  550. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  551. mindspore/train/callback/_summary_collector.py +52 -19
  552. mindspore/train/callback/_time_monitor.py +2 -1
  553. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
  554. mindspore/train/data_sink.py +25 -2
  555. mindspore/train/dataset_helper.py +15 -16
  556. mindspore/train/loss_scale_manager.py +8 -7
  557. mindspore/train/metrics/accuracy.py +3 -3
  558. mindspore/train/metrics/confusion_matrix.py +9 -9
  559. mindspore/train/metrics/error.py +3 -3
  560. mindspore/train/metrics/hausdorff_distance.py +4 -4
  561. mindspore/train/metrics/mean_surface_distance.py +3 -3
  562. mindspore/train/metrics/metric.py +0 -12
  563. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  564. mindspore/train/metrics/precision.py +11 -10
  565. mindspore/train/metrics/recall.py +9 -9
  566. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  567. mindspore/train/mind_ir_pb2.py +174 -46
  568. mindspore/train/model.py +184 -113
  569. mindspore/train/serialization.py +622 -978
  570. mindspore/train/summary/_summary_adapter.py +2 -2
  571. mindspore/train/summary/summary_record.py +2 -3
  572. mindspore/train/train_thor/model_thor.py +1 -1
  573. mindspore/turbojpeg.dll +0 -0
  574. mindspore/utils/__init__.py +6 -3
  575. mindspore/utils/dryrun.py +140 -0
  576. mindspore/utils/hooks.py +81 -0
  577. mindspore/utils/runtime_execution_order_check.py +550 -0
  578. mindspore/utils/utils.py +138 -4
  579. mindspore/vcmeta.dll +0 -0
  580. mindspore/vcruntime140.dll +0 -0
  581. mindspore/vcruntime140_1.dll +0 -0
  582. mindspore/version.py +1 -1
  583. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
  584. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +587 -418
  585. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
  586. mindspore/_install_custom.py +0 -43
  587. mindspore/common/_register_for_adapter.py +0 -74
  588. mindspore/common/_tensor_overload.py +0 -139
  589. mindspore/mindspore_np_dtype.dll +0 -0
  590. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  591. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  592. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  593. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  594. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  595. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  596. mindspore/ops_generate/gen_utils.py +0 -209
  597. mindspore/ops_generate/op_proto.py +0 -145
  598. mindspore/ops_generate/template.py +0 -261
  599. mindspore/profiler/envprofiling.py +0 -254
  600. mindspore/profiler/profiling.py +0 -1926
  601. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
  602. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
@@ -35,11 +35,9 @@ import stat
35
35
  import subprocess
36
36
  import warnings
37
37
 
38
- import gc
39
38
  import time
40
39
  import uuid
41
40
  import multiprocessing
42
- from enum import Enum
43
41
  from importlib import import_module
44
42
  import sys
45
43
  import threading
@@ -53,18 +51,18 @@ import mindspore._c_dataengine as cde
53
51
  from mindspore._c_expression import typing
54
52
 
55
53
  from mindspore import log as logger
56
- from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context,\
57
- _enable_distributed_mindrt
54
+ from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context, \
55
+ _enable_distributed_mindrt
58
56
  from mindspore.dataset.engine.offload import GetOffloadModel
59
-
57
+ from mindspore.communication.management import get_group_size
60
58
  import mindspore.dataset.transforms.c_transforms as c_transforms
61
59
  import mindspore.dataset.transforms.py_transforms as py_transforms
62
60
  import mindspore.dataset.transforms as transforms
63
61
  from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCEPIECE_MODE
64
- from mindspore.parallel._utils import _get_device_num
65
62
  from mindspore.dataset.debug import DebugHook
66
63
 
67
64
  from mindspore.dataset.engine import samplers
65
+ from mindspore.dataset.engine.samplers import Shuffle
68
66
  from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
69
67
  ITERATORS_LIST, _unset_iterator_cleanup, _cleanup_the_iterators_if_created
70
68
  from .queue import _SharedQueue, _Queue
@@ -74,13 +72,14 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
74
72
  check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_padded_batch, \
75
73
  check_total_batch, check_sync_update
76
74
  from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
77
- get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, _get_debug_hook_list
75
+ get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, \
76
+ _get_debug_hook_list, get_multiprocessing_start_method
78
77
  from ..core.datatypes import mstype_to_detype
79
78
  from ..core.validator_helpers import replace_none
80
79
  from ..core.py_util_helpers import ExceptionHandler
81
80
  from ..transforms.py_transforms_util import FuncWrapper, Implementation
82
81
  from ..vision.transforms import ToNumpy
83
- from ...mindrecord.config import _get_enc_key, _get_enc_mode, _get_hash_mode, encrypt, append_hash_to_file
82
+ from ...mindrecord.config import _get_enc_key, _get_enc_mode, encrypt
84
83
 
85
84
  try:
86
85
  context = import_module("mindspore.context")
@@ -136,71 +135,6 @@ def _reset_training_dataset(global_step, dataset_size):
136
135
  raise RuntimeError("Training dataset is not set.")
137
136
 
138
137
 
139
- class Shuffle(str, Enum):
140
- """Specify the shuffle mode.
141
-
142
- - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
143
- - ``Shuffle.FILES`` : Shuffle files only.
144
- - ``Shuffle.INFILE`` : Shuffle data within each file.
145
- """
146
- GLOBAL: str = "global"
147
- FILES: str = "files"
148
- INFILE: str = "infile"
149
-
150
-
151
- ShuffleToShuffleMode = {Shuffle.FILES: cde.ShuffleMode.FILES,
152
- Shuffle.GLOBAL: cde.ShuffleMode.GLOBAL,
153
- Shuffle.INFILE: cde.ShuffleMode.INFILE}
154
-
155
-
156
- def shuffle_to_shuffle_mode(shuffle):
157
- """
158
- Shuffle Enum to Shuffle Mode
159
-
160
- Args:
161
- shuffle (Shuffle): shuffle flag to shuffle mode in C layer
162
-
163
- Returns:
164
- ShuffleMode, shuffle mode
165
- """
166
- shuffle_mode = cde.ShuffleMode.GLOBAL # Global shuffle
167
- if not isinstance(shuffle, Shuffle):
168
- if shuffle is None or shuffle:
169
- shuffle_mode = cde.ShuffleMode.GLOBAL # Global shuffle
170
- else:
171
- shuffle_mode = cde.ShuffleMode.FALSE # No shuffle
172
- else:
173
- shuffle_mode = ShuffleToShuffleMode[shuffle]
174
- return shuffle_mode
175
-
176
-
177
- def shuffle_to_bool(shuffle):
178
- """
179
- Shuffle Enum to bool
180
-
181
- Args:
182
- shuffle (Shuffle): shuffle flag to bool
183
-
184
- Returns:
185
- bool, True / False
186
- """
187
- if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
188
- raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.GLOBAL' or "
189
- "'Shuffle.FILES' or 'Shuffle.INFILE'.")
190
-
191
- shuffle_bool = True
192
- if not isinstance(shuffle, Shuffle):
193
- if shuffle is None:
194
- shuffle_bool = None
195
- elif shuffle:
196
- shuffle_bool = True
197
- else:
198
- shuffle_bool = False
199
- else:
200
- shuffle_bool = True
201
- return shuffle_bool
202
-
203
-
204
138
  @check_zip
205
139
  def zip(datasets):
206
140
  """
@@ -403,6 +337,7 @@ class Dataset:
403
337
  parent = self.parent
404
338
  self.parent = []
405
339
  dataset = copy.deepcopy(self)
340
+ dataset = self.pre_process(dataset)
406
341
  global _OP_NAME
407
342
  _OP_NAME = Dataset._get_operator_id(dataset)
408
343
  ir_tree = dataset.parse_tree(getter_mode)
@@ -410,6 +345,19 @@ class Dataset:
410
345
  _init_device_info()
411
346
  return ir_tree, dataset
412
347
 
348
+ def pre_process(self, dataset):
349
+ """Insert batch operation for GeneratorDataset with batch_sampler."""
350
+ if hasattr(dataset, "has_batch_sampler") and dataset.has_batch_sampler:
351
+ original_parent = dataset.parent
352
+ dataset.parent = []
353
+ dataset = dataset.batch(batch_size=-1, num_parallel_workers=dataset.num_parallel_workers,
354
+ per_batch_map=dataset.collate_fn)
355
+ dataset.parent = original_parent
356
+ else:
357
+ for index in range(len(dataset.children)):
358
+ dataset.children[index] = self.pre_process(dataset.children[index])
359
+ return dataset
360
+
413
361
  def parse_tree(self, getter_mode=False):
414
362
  """
415
363
  Internal method to parse the API tree into an IR tree.
@@ -495,8 +443,7 @@ class Dataset:
495
443
  .. image:: bucket_batch_by_length_en.png
496
444
 
497
445
  Note:
498
- - When using `Data Sinking <https://www.mindspore.cn/docs/en/master/model_train/train_process/optimize/
499
- sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
446
+ - When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
500
447
  You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
501
448
  or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
502
449
 
@@ -561,7 +508,7 @@ class Dataset:
561
508
  @check_batch
562
509
  def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs):
563
510
  """
564
- Combine batch_size number of consecutive rows into batch which apply per_batch_map to the samples first.
511
+ Combine `batch_size` number of consecutive rows into batch which apply `per_batch_map` to the samples first.
565
512
 
566
513
  For any column, all the elements within that column must have the same shape.
567
514
 
@@ -572,8 +519,7 @@ class Dataset:
572
519
  Note:
573
520
  - The order of using repeat and batch reflects the number of batches and per_batch_map.
574
521
  It is recommended that the repeat operation applied after the batch operation finished.
575
- - When using `Data Sinking <https://www.mindspore.cn/docs/en/master/model_train/train_process/optimize/
576
- sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
522
+ - When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
577
523
  You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
578
524
  or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
579
525
 
@@ -615,13 +561,19 @@ class Dataset:
615
561
 
616
562
  - max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
617
563
  allocation to copy data between processes, the total occupied shared memory will increase as
618
- ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. If set
619
- to -1, shared memory will be dynamically allocated with the actual size of data. This is only used if
620
- ``python_multiprocessing`` is set to True. If it is an int value, it represents
621
- ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
622
- If it is a list, the first element represents the ``input_columns`` use this value as the unit to
623
- create shared memory, and the second element represents ``output_columns`` use this value as the unit
624
- to create shared memory. Default: ``None`` , allocate shared memory dynamically.
564
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
565
+ This is only used if ``python_multiprocessing`` is set to ``True``.
566
+ Default: ``None`` , allocate shared memory dynamically (deprecated in future version).
567
+
568
+ - If set to ``-1`` / ``None``, shared memory will be dynamically allocated with the
569
+ actual size of data.
570
+
571
+ - If it is an int value, it represents ``input_columns`` and ``output_columns`` use this value as the
572
+ unit to create shared memory.
573
+
574
+ - If it is a list, represents the ``input_columns`` use the first element as the unit to
575
+ create shared memory, and represents ``output_columns`` use the second element as the
576
+ unit to create shared memory.
625
577
 
626
578
  Returns:
627
579
  Dataset, a new dataset with the above operation applied.
@@ -669,8 +621,7 @@ class Dataset:
669
621
  Note:
670
622
  - The order of using repeat and padded_batch reflects the number of batches.
671
623
  It is recommended that the repeat operation applied after the padded_batch operation finished.
672
- - When using `Data Sinking <https://www.mindspore.cn/docs/en/master/model_train/train_process/optimize/
673
- sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
624
+ - When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
674
625
  You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
675
626
  or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
676
627
 
@@ -724,9 +675,9 @@ class Dataset:
724
675
 
725
676
  Args:
726
677
  condition_name (str): The condition name that is used to toggle sending next row.
727
- num_batch (int): the number of batches without blocking at the start of each epoch.
678
+ num_batch (int, optional): the number of batches without blocking at the start of each epoch.
728
679
  Default: ``1``.
729
- callback (function): The callback function that will be invoked when sync_update is called.
680
+ callback (function, optional): The callback function that will be invoked when sync_update is called.
730
681
  Default: ``None``.
731
682
 
732
683
  Returns:
@@ -911,15 +862,21 @@ class Dataset:
911
862
  - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
912
863
  This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
913
864
 
914
- - max_rowsize (Union[int, list[int]], optional): Maximum size of row in MB that is used for shared
915
- memory allocation to copy data between processes, the total occupied shared memory will increase as
916
- ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. If set
917
- to -1, shared memory will be dynamically allocated with the actual size of data. This is only used if
918
- ``python_multiprocessing`` is set to True. If it is an int value, it represents
919
- ``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
920
- If it is a list, the first element represents the ``input_columns`` use this value as the unit to
921
- create shared memory, and the second element represents ``output_columns`` use this value as the unit
922
- to create shared memory. Default: ``None`` , allocate shared memory dynamically.
865
+ - max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
866
+ allocation to copy data between processes, the total occupied shared memory will increase as
867
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
868
+ This is only used if ``python_multiprocessing`` is set to ``True``.
869
+ Default: ``None`` , allocate shared memory dynamically (deprecated in future version).
870
+
871
+ - If set to ``-1`` / ``None``, shared memory will be dynamically allocated with the
872
+ actual size of data.
873
+
874
+ - If it is an int value, it represents ``input_columns`` and ``output_columns`` use this value as the
875
+ unit to create shared memory.
876
+
877
+ - If it is a list, the first element represents the ``input_columns`` use this value as the unit to
878
+ create shared memory, and the second element represents ``output_columns`` use this value as the
879
+ unit to create shared memory.
923
880
 
924
881
  - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
925
882
  Default: ``None``, which means no cache is used.
@@ -932,8 +889,29 @@ class Dataset:
932
889
  Note:
933
890
  - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
934
891
  Python functions (PyFuncs).
935
- - Do not add network computing operators from mindspore.nn and mindspore.ops or others into this
936
- `operations` .
892
+ - Setting the start method of multiprocessing to `spawn` mode by
893
+ ds.config.set_multiprocessing_start_method("spawn") with `python_ multiprocessing=True`
894
+ and `num_parallel_workers>1` supports adding network computing operators from mindspore.nn and
895
+ mindspore.ops or other network computing operators into this `operations` .
896
+ Otherwise, adding to `operations` is not supported.
897
+ - Currently only some scenarios support calling DVPP operators in Python functions passed in with the
898
+ `operations` parameter:
899
+
900
+ +---------------+----------------------------+----------------------------+----------------------------+
901
+ | | | Multiprocessing |
902
+ | | Multithreading +----------------------------+----------------------------+
903
+ | | | spawn | fork |
904
+ +===============+============================+============================+============================+
905
+ |Independent |Data Processing: support |Data Processing: support |Data Processing: support |
906
+ | | | | |
907
+ |process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
908
+ | |training: not support |training: support |training: not support |
909
+ +---------------+----------------------------+----------------------------+----------------------------+
910
+ |Non-independent|Data Processing: support |Data Processing: support |Data Processing: support |
911
+ | | | | |
912
+ |process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
913
+ | |training: support |training: support |training: not support |
914
+ +---------------+----------------------------+----------------------------+----------------------------+
937
915
 
938
916
  Returns:
939
917
  Dataset, a new dataset with the above operation applied.
@@ -1557,8 +1535,8 @@ class Dataset:
1557
1535
  >>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
1558
1536
  >>> d1.save('/path/to/save_file')
1559
1537
  """
1560
- if (_get_enc_key() is not None or _get_hash_mode() is not None) and num_files > 1:
1561
- raise RuntimeError("When encode mode or hash check is enabled, " +
1538
+ if _get_enc_key() is not None and num_files > 1:
1539
+ raise RuntimeError("When encode mode is enabled, " +
1562
1540
  "the automatic sharding function is unavailable.")
1563
1541
 
1564
1542
  ir_tree, api_tree = self.create_ir_tree()
@@ -1571,10 +1549,6 @@ class Dataset:
1571
1549
 
1572
1550
  consumer.Save()
1573
1551
 
1574
- if _get_hash_mode() is not None:
1575
- append_hash_to_file(file_name)
1576
- append_hash_to_file(file_name + ".db")
1577
-
1578
1552
  if _get_enc_key() is not None:
1579
1553
  encrypt(file_name, _get_enc_key(), _get_enc_mode())
1580
1554
  encrypt(file_name + ".db", _get_enc_key(), _get_enc_mode())
@@ -1761,7 +1735,7 @@ class Dataset:
1761
1735
  Get the shapes of output data.
1762
1736
 
1763
1737
  Args:
1764
- estimate (bool): If `estimate` is ``False`` , will return the shapes of first data row.
1738
+ estimate (bool, optional): If `estimate` is ``False`` , will return the shapes of first data row.
1765
1739
  Otherwise, will iterate the whole dataset and return the estimated shapes of data row,
1766
1740
  where dynamic shape is marked as None (used in dynamic data shapes scenario).
1767
1741
  Default: ``False`` .
@@ -2338,10 +2312,10 @@ class SourceDataset(Dataset):
2338
2312
  self.shard_id = replace_none(shard_id, 0)
2339
2313
 
2340
2314
  if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
2341
- raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.GLOBAL' or "
2342
- "'Shuffle.FILES' or 'Shuffle.INFILE'.")
2315
+ raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.ADAPTIVE' or "
2316
+ "'Shuffle.GLOBAL' or 'Shuffle.PARTIAL' or 'Shuffle.FILES' or 'Shuffle.INFILE'.")
2343
2317
 
2344
- self.shuffle_flag = 2 # Global shuffle
2318
+ self.shuffle_flag = 5 # Adaptive shuffle
2345
2319
  if not isinstance(shuffle, Shuffle):
2346
2320
  if shuffle is None or shuffle:
2347
2321
  self.shuffle_flag = 2 # Global shuffle
@@ -2354,6 +2328,10 @@ class SourceDataset(Dataset):
2354
2328
  self.shuffle_flag = 1 # Files shuffle
2355
2329
  elif shuffle == Shuffle.INFILE:
2356
2330
  self.shuffle_flag = 3 # Infile shuffle
2331
+ elif shuffle == Shuffle.ADAPTIVE:
2332
+ self.shuffle_flag = 5
2333
+ elif shuffle == Shuffle.PARTIAL:
2334
+ self.shuffle_flag = 4
2357
2335
 
2358
2336
  def parse(self, children=None):
2359
2337
  raise NotImplementedError("Dataset has to implement parse method.")
@@ -2410,15 +2388,23 @@ class MappableDataset(SourceDataset):
2410
2388
  def __init__(self, num_parallel_workers=None, sampler=None, num_samples=None, shuffle=None, num_shards=None,
2411
2389
  shard_id=None, cache=None):
2412
2390
  num_shards, shard_id = self._update_data_shard(num_shards, shard_id)
2391
+ if sampler is None:
2392
+ if shuffle is None or shuffle is True:
2393
+ shuffle = Shuffle.GLOBAL
2394
+ elif shuffle is False:
2395
+ shuffle = Shuffle.FALSE
2413
2396
  super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
2414
2397
  num_shards=num_shards, shard_id=shard_id, cache=cache)
2415
- self.shuffle_flag = replace_none(shuffle, True)
2416
2398
  self.sampler = samplers.select_sampler(num_samples, sampler, shuffle, num_shards, shard_id)
2417
2399
 
2418
2400
  def add_sampler(self, new_sampler):
2419
2401
  """
2420
2402
  Add a child sampler for the current dataset.
2421
2403
 
2404
+ Note:
2405
+ - If the sampler is added and it has a shuffle option, its value must be ``Shuffle.GLOBAL`` .
2406
+ Additionally, the original sampler's shuffle value cannot be ``Shuffle.PARTIAL`` .
2407
+
2422
2408
  Args:
2423
2409
  new_sampler (Sampler): The child sampler to be added.
2424
2410
 
@@ -2432,6 +2418,16 @@ class MappableDataset(SourceDataset):
2432
2418
  # Note: By adding a sampler, the sampled IDs will flow to the new_sampler
2433
2419
  # after first passing through the current samplers attached to this dataset.
2434
2420
  self.dataset_size = None
2421
+
2422
+ if self.sampler is not None and self.sampler.get_shuffle_mode() == Shuffle.PARTIAL:
2423
+ raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the current sampler "
2424
+ "must not be Shuffle.PARTIAL.")
2425
+
2426
+ if new_sampler.get_shuffle_mode() != Shuffle.GLOBAL and new_sampler.get_shuffle_mode() != Shuffle.FALSE:
2427
+ raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the input sampler "
2428
+ "must be Shuffle.FALSE or Shuffle.GLOBAL, but got: {}."
2429
+ .format(new_sampler.get_shuffle_mode()))
2430
+
2435
2431
  new_sampler.add_child(self.sampler)
2436
2432
  self.sampler = new_sampler
2437
2433
 
@@ -2594,7 +2590,7 @@ def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
2594
2590
  threshold_ratio = 0.8
2595
2591
  # Verify available size only when using static shared memory on Linux
2596
2592
  if platform.system().lower() not in {"windows", "darwin"} and in_rowsize != -1 and out_rowsize != -1:
2597
- device_num = _get_device_num()
2593
+ device_num = get_group_size()
2598
2594
  # In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
2599
2595
  # on the ascend server is 8.
2600
2596
  if device_num > 1:
@@ -2680,11 +2676,6 @@ class BatchDataset(UnionBaseDataset):
2680
2676
  else:
2681
2677
  self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
2682
2678
 
2683
- def __del__(self):
2684
- if hasattr(self, "process_pool") and self.process_pool is not None:
2685
- self.process_pool.terminate()
2686
- del self.process_pool
2687
-
2688
2679
  def parse(self, children=None):
2689
2680
  return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, False, self.input_columns,
2690
2681
  self.output_columns, self.batch_size_func, self.per_batch_map, {},
@@ -2747,8 +2738,8 @@ class BatchDataset(UnionBaseDataset):
2747
2738
  if self.num_parallel_workers is None:
2748
2739
  self.num_parallel_workers = get_num_parallel_workers()
2749
2740
 
2750
- self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, [self.per_batch_map],
2751
- self.max_rowsize)
2741
+ self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(), self.num_parallel_workers,
2742
+ str(self), [self.per_batch_map], self.max_rowsize)
2752
2743
  # Wrap per_batch_map into _PythonCallable
2753
2744
  self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
2754
2745
  else:
@@ -3023,7 +3014,7 @@ class SyncWaitDataset(UnionBaseDataset):
3023
3014
 
3024
3015
  class ShuffleDataset(UnionBaseDataset):
3025
3016
  """
3026
- The result of applying Shuffle operation to the input Dataset.
3017
+ The result of applying shuffle operation to the input Dataset.
3027
3018
 
3028
3019
  Args:
3029
3020
  input_dataset (Dataset): Input Dataset to be shuffled.
@@ -3200,9 +3191,21 @@ def _worker_loop(operations, pipe, worker_id):
3200
3191
 
3201
3192
 
3202
3193
  def worker_target(operations, worker_id):
3194
+ logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
3203
3195
  return lambda pipe: _worker_loop(operations, pipe, worker_id)
3204
3196
 
3205
3197
 
3198
+ class WorkerTarget:
3199
+ def __init__(self, operations, pipe, worker_id):
3200
+ self.operations = operations
3201
+ self.pipe = pipe
3202
+ self.worker_id = worker_id
3203
+ logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
3204
+
3205
+ def __call__(self):
3206
+ return _worker_loop(self.operations, self.pipe, self.worker_id)
3207
+
3208
+
3206
3209
  class _MPWorker(multiprocessing.Process):
3207
3210
  """
3208
3211
  Worker process for multiprocessing.
@@ -3257,6 +3260,12 @@ class _MPWorker(multiprocessing.Process):
3257
3260
 
3258
3261
  logger.info(f"Closing worker with PID: {self.pid}")
3259
3262
  self.pipe.master_close()
3263
+
3264
+ process_dir = os.path.join('/proc', str(self.pid))
3265
+ while self.is_alive() and os.path.exists(process_dir):
3266
+ logger.info("Waiting for worker {} closed ...".format(self.pid))
3267
+ time.sleep(0.001)
3268
+
3260
3269
  # del the handle which hold by master
3261
3270
  del self.pipe.in_queue
3262
3271
  del self.pipe.res_queue
@@ -3276,6 +3285,41 @@ class _MPWorker(multiprocessing.Process):
3276
3285
  return False
3277
3286
 
3278
3287
 
3288
+ def worker_is_alive(worker):
3289
+ """Check the subprocess worker status in spawn mode"""
3290
+ try:
3291
+ return worker.is_alive()
3292
+ except ValueError:
3293
+ return False
3294
+
3295
+
3296
+ def close_worker(worker, pipe):
3297
+ """Close the subprocess worker in spawn mode"""
3298
+ try:
3299
+ if worker_is_alive(worker):
3300
+ # release the eager executor which is used by current process
3301
+ transforms.transforms.clean_unused_executors()
3302
+
3303
+ logger.info(f"Closing worker with PID: {worker.pid}")
3304
+ pipe.master_close()
3305
+
3306
+ process_dir = os.path.join('/proc', str(worker.pid))
3307
+ while worker_is_alive(worker) and os.path.exists(process_dir):
3308
+ logger.info("Waiting for worker {} closed ...".format(worker.pid))
3309
+ time.sleep(0.5)
3310
+
3311
+ # del the handle which hold by master
3312
+ del pipe.in_queue
3313
+ del pipe.res_queue
3314
+ worker.terminate()
3315
+ worker.join()
3316
+ worker.close()
3317
+ except ValueError:
3318
+ # Process has been closed already
3319
+ return
3320
+ return
3321
+
3322
+
3279
3323
  class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3280
3324
  """
3281
3325
  A wrapper to multiprocessing.pool that performs cleanup and ensure proper termination of forked processes.
@@ -3302,10 +3346,11 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3302
3346
  self.origin_hook(ex_type, value, tb)
3303
3347
  self.mp_pool_exit_preprocess()
3304
3348
 
3305
- def __init__(self, op_name, num_parallel_workers, operations, max_rowsize=(-1, -1)):
3349
+ def __init__(self, start_method, num_parallel_workers, op_name, operations, max_rowsize=(-1, -1)):
3306
3350
  super(_PythonMultiprocessing, self).__init__()
3307
- self.op_name = op_name
3351
+ self.start_method = start_method # python multiprocssing start method: fork / spawn
3308
3352
  self.num_parallel_workers = num_parallel_workers
3353
+ self.op_name = op_name
3309
3354
  self.operations = operations
3310
3355
  self.max_rowsize = max_rowsize
3311
3356
 
@@ -3316,14 +3361,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3316
3361
  self.queues_map = {}
3317
3362
  self.next_queue = 0
3318
3363
 
3319
- self.eot = None
3320
- self.watch_dog = None
3364
+ self.cleaning_process = None
3321
3365
  self.ppid = None
3322
3366
  self.hook = None
3323
3367
  self.warning_ctl = None
3324
3368
  # cache thread (get_ident()) to worker_id mapping in Python layer
3325
3369
  self.python_threads_to_workers = {}
3326
3370
  self.eof = None
3371
+ self.running = False
3327
3372
 
3328
3373
  def __del__(self):
3329
3374
  try:
@@ -3331,60 +3376,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3331
3376
  except TypeError:
3332
3377
  pass
3333
3378
 
3334
- # This wait function is for cleaning zombie subprocesses
3335
- @staticmethod
3336
- def wait_pid():
3337
- """
3338
- This function is used by the main process to release subprocess resources.
3339
- """
3340
- try:
3341
- while True:
3342
- child_pid, _ = os.waitpid(-1, os.WNOHANG)
3343
- if child_pid == 0:
3344
- break
3345
- except OSError:
3346
- # waitpid may fail for some reason, so we ignore this error
3347
- pass
3348
-
3349
- # Dataset need watch_dog thread to monitoring fork multiprocessing,
3350
- # and thread can't be a member function otherwise python won't collect and release resources.
3351
- @staticmethod
3352
- def _watch_dog(eot, workers):
3353
- """
3354
- This thread is for monitoring subprocesses forked by GeneratorDataset/map/batch
3355
- """
3356
- if not isinstance(workers, list):
3357
- raise TypeError("[Internal Error] The 2nd parameter of watch dog thread should be list of process, "
3358
- "but got {}.".format(type(workers)))
3359
-
3360
- while not eot.is_set():
3361
- # Monitoring and count how many subprocesses already exit
3362
- clear_subprocess_timeout = _PythonMultiprocessing._monitor_subprocess_exit(workers)
3363
- # If find subprocess exit, we will wait for 30s and do some waitpid operations
3364
- if clear_subprocess_timeout > 0:
3365
- start = time.time()
3366
- while time.time() - start < clear_subprocess_timeout:
3367
- # We need to distinguishing get_dataset_size or train finished normally and hang scenario.
3368
- # If get_dataset_size or train finished normally, _stop_subprocess can be execute and
3369
- # self.need_abort can be set to True. If main process is hang in get(), self.need_abort
3370
- # will never set to True, then we wait for 30s and kill main process
3371
- if eot.is_set():
3372
- return
3373
- # Sometimes subprocess may be zombie, so in 30s we can wait and do some useful tasks(waitpid).
3374
- _PythonMultiprocessing.wait_pid()
3375
- # multiprocessing.queue may hang in .get() forever when put() process was killed.
3376
- # We have to exit main process otherwise main process will hang.
3377
- _PythonMultiprocessing._terminate_processes(workers)
3378
- logger.critical("The subprocess of dataset may exit unexpected or be killed, "
3379
- "main process will exit. If this is not an artificial operation, you can use "
3380
- "ds.config.set_enable_watchdog(False) to block this error.")
3381
- os.kill(os.getpid(), signal.SIGTERM)
3382
- # sleep to release GIL
3383
- time.sleep(1)
3384
-
3385
- # release the workers
3386
- del workers
3387
-
3388
3379
  @staticmethod
3389
3380
  def _terminate_processes(processes):
3390
3381
  """Terminate subprocesses"""
@@ -3401,45 +3392,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3401
3392
  # We don't use w.join because join can only used in main process or join will raise an error.
3402
3393
  p._popen.wait() # pylint: disable=W0212
3403
3394
 
3404
- # Monitor the exit number of subprocesses
3405
- @staticmethod
3406
- def _monitor_subprocess_exit(workers):
3407
- """
3408
- To monitor whether process is exit.
3409
-
3410
- Args:
3411
- workers (list of multiprocessing.Process): multiprocessing.Process.
3412
-
3413
- Returns:
3414
- int, the timeout(in seconds) when process exit.
3415
- """
3416
- for w in workers:
3417
- try:
3418
- exit_code = w.exitcode
3419
- if exit_code is not None:
3420
- # For kill -9, we can exit quickly
3421
- if exit_code == -9:
3422
- return 1
3423
- # For kill -15, we still exit after 30s
3424
- if exit_code == -15:
3425
- return 30
3426
- # In some cases the subprocess has been killed but the exitcode is still None.
3427
- # So we use os.kill(pid, 0) to check if it is alive.
3428
- subprocess_alive = _PythonMultiprocessing.is_process_alive(w.pid)
3429
- if not subprocess_alive:
3430
- # Like kill -15, we wait 30s before exit
3431
- return 30
3432
- except ValueError:
3433
- # process has been closed already
3434
- return 0
3435
- return 0
3436
-
3437
3395
  @staticmethod
3438
3396
  def is_process_alive(pid):
3439
3397
  """
3440
3398
  Check if the process is alive or not.
3441
3399
  Note: We hit a deadlock when we use psutil or w.exitcode to check whether a process is alive.
3442
- Instead we use os.kill(ppid, 0).
3400
+ Instead, we use os.kill(ppid, 0).
3443
3401
 
3444
3402
  Args:
3445
3403
  pid: pid of the process to be checked
@@ -3466,6 +3424,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3466
3424
  quit_signal: The flag of quit.
3467
3425
  """
3468
3426
  signal.signal(signal.SIGINT, signal.SIG_IGN)
3427
+ # Initialize C++ side signal handlers
3428
+ cde.register_worker_handlers()
3469
3429
  while _PythonMultiprocessing.is_process_alive(ppid):
3470
3430
  if quit_signal.is_set():
3471
3431
  return
@@ -3477,6 +3437,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3477
3437
 
3478
3438
  time.sleep(0.1)
3479
3439
 
3440
+ logger.info("Clean process detects that the main process {} has exited, begin to terminate the "
3441
+ "worker process(es): {}".format(ppid, [worker.pid for worker in workers]))
3480
3442
  _PythonMultiprocessing._terminate_processes(workers)
3481
3443
  del workers
3482
3444
  os.kill(os.getpid(), signal.SIGTERM)
@@ -3493,10 +3455,10 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3493
3455
  """
3494
3456
  self.python_threads_to_workers = {}
3495
3457
  self.op_id = op_id
3496
- logger.info("Launching new Python Multiprocessing pool for Op:" + str(self.op_id))
3458
+ logger.info("Launching new Python multiprocessing pool for Op: " + str(self.op_id))
3497
3459
  if self.is_mp_enabled():
3498
3460
  message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
3499
- " The existing pool will be terminated first."
3461
+ " The existing pool will be terminated first."
3500
3462
  logger.warning(message)
3501
3463
  self.terminate()
3502
3464
  self.reset()
@@ -3515,32 +3477,52 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3515
3477
  if self.workers is not None:
3516
3478
  raise Exception("Pool was already created, close it first.")
3517
3479
 
3518
- # Let gc collect unreferenced memory to avoid child processes in the pool to do it
3519
- gc.collect()
3520
-
3521
- # Construct python worker processes
3522
3480
  self.workers = []
3481
+ self.pipes = []
3482
+ self.check_interval = get_multiprocessing_timeout_interval()
3523
3483
  self.warning_ctl = multiprocessing.Value('i', 0)
3524
- for worker_id in range(self.num_parallel_workers):
3525
- worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, worker_id)
3526
- worker.start()
3527
- self.workers.append(worker)
3484
+ if self.start_method == "fork":
3485
+ # Construct python worker processes
3486
+ for worker_id in range(self.num_parallel_workers):
3487
+ worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, worker_id)
3488
+ worker.start()
3489
+ self.workers.append(worker)
3490
+ else:
3491
+ multiprocessing.set_start_method(self.start_method, True)
3492
+
3493
+ # Construct python worker processes
3494
+ for worker_id in range(self.num_parallel_workers):
3495
+ shared_memory = get_enable_shared_mem()
3496
+ pipe = Pipe(self.warning_ctl, shared_memory=shared_memory, max_rowsize=self.max_rowsize)
3497
+ self.check_interval = get_multiprocessing_timeout_interval()
3498
+ worker = multiprocessing.Process(target=WorkerTarget(self.operations, pipe, worker_id),
3499
+ name="MapWorker" + str(worker_id), daemon=True)
3500
+ self.workers.append(worker)
3501
+ self.pipes.append(pipe)
3502
+ worker.start()
3503
+
3504
+ multiprocessing.set_start_method("fork", True)
3528
3505
 
3529
- logger.info("Op: " + str(self.op_id) + " Python multiprocessing pool workers' PIDs: " + str(self.get_pids()))
3506
+ logger.info("Launch worker process(es): {}".format(self.get_pids()))
3530
3507
 
3531
3508
  self.hook = _PythonMultiprocessing._ExceptHookHandler()
3532
3509
 
3533
- # The op (Map, Batch, etc) multiprocessing will launch a watch dog thread for monitoring sub processes
3534
- self._launch_watch_dog()
3510
+ # Launch a clean process and register worker processes to be monitored by the watch dog.
3511
+ self._launch_monitor()
3512
+ self.running = True
3535
3513
 
3536
- atexit.register(self.terminate)
3514
+ # Register a termination function using weakref to avoid the object from unable to properly destruct.
3515
+ atexit.register(lambda cleanup: cleanup()() if cleanup() is not None else None,
3516
+ weakref.WeakMethod(self.terminate))
3537
3517
 
3538
3518
  def terminate(self):
3539
- # close watch dog first and then close all the workers
3540
- self.abort_watchdog()
3541
- self.close_all_workers()
3542
- if hasattr(self, "warning_ctl"):
3543
- del self.warning_ctl
3519
+ if self.running:
3520
+ # abort the monitor first and then close all the workers
3521
+ self._abort_monitor()
3522
+ self.close_all_workers()
3523
+ if hasattr(self, "warning_ctl"):
3524
+ del self.warning_ctl
3525
+ self.running = False
3544
3526
 
3545
3527
  def get_pids(self):
3546
3528
  """
@@ -3596,15 +3578,48 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3596
3578
 
3597
3579
  # todo check_iterator_cleanup
3598
3580
  if self.is_running() and check_iterator_cleanup() is False:
3599
- return self.workers[worker_id].execute(idx, *args)
3581
+ if self.start_method == "fork":
3582
+ return self.workers[worker_id].execute(idx, *args)
3583
+ # spawn mode
3584
+ self.pipes[worker_id].master_send(idx, args)
3585
+ time_s = time.time()
3586
+ wait_count = 1
3587
+ while True:
3588
+ cost_time = time.time() - time_s
3589
+ if cost_time / self.check_interval >= wait_count:
3590
+ wait_count += 1
3591
+ logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
3592
+ "worker of the map operation is hanging. "
3593
+ "Check whether the user defined data transform is too slow or the "
3594
+ "output data is too large. You can also set the timeout interval by "
3595
+ "ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
3596
+ "of this log.")
3597
+ pid = self.workers[worker_id].pid
3598
+ logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
3599
+ install_status, _ = subprocess.getstatusoutput("py-spy --version")
3600
+ if install_status == 0:
3601
+ stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
3602
+ logger.warning("Map worker subprocess stack:\n{}".format(stack))
3603
+ else:
3604
+ logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
3605
+ try:
3606
+ res = self.pipes[worker_id].master_receive()
3607
+ except queue.Empty:
3608
+ continue
3609
+ if res is None:
3610
+ # receive finish signal
3611
+ return None
3612
+ if isinstance(res, ExceptionHandler):
3613
+ res.reraise()
3614
+ return res
3600
3615
 
3601
3616
  return None
3602
3617
 
3603
- def _launch_watch_dog(self):
3618
+ def _launch_monitor(self):
3604
3619
  """
3605
- We will launch a watchdog thread and a clean process to cleaning subprocess when there is process was killed.
3606
- The watchdog thread will cleanup subprocesses and main process when one of the subprocesses was killed.
3607
- The cleaning subprocess will cleanup subprocesses when main process was killed.
3620
+ Launch a clean process and register subprocess to be monitored by the watch dog.
3621
+ The clean process will clean up subprocesses when main process exited.
3622
+ The watch dog will clean up subprocesses and main process when any subprocess exited.
3608
3623
  """
3609
3624
  if platform.system().lower() != 'windows':
3610
3625
  self.eof = multiprocessing.Event()
@@ -3613,38 +3628,45 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3613
3628
  args=(self.ppid, self.workers, self.eof),
3614
3629
  daemon=True)
3615
3630
  self.cleaning_process.start()
3631
+ logger.info("Launch clean process {} to monitor worker "
3632
+ "process(es): {}".format(self.cleaning_process.pid, self.get_pids()))
3616
3633
 
3617
3634
  if get_enable_watchdog():
3618
- self.eot = threading.Event()
3619
- self.watch_dog = threading.Thread(target=self._watch_dog,
3620
- name="MapWatchDog",
3621
- args=(self.eot, self.workers + [self.cleaning_process]),
3622
- daemon=True)
3623
- self.watch_dog.start()
3624
-
3625
- def _abort_watchdog(self):
3626
- if not self.eot.is_set():
3627
- self.eot.set()
3628
-
3629
- def abort_watchdog(self):
3630
- if hasattr(self, 'watch_dog') and self.watch_dog is not None and hasattr(self, 'eot') and self.eot is not None:
3631
- self._abort_watchdog()
3635
+ worker_ids = [worker.pid for worker in self.workers]
3636
+ worker_ids.append(self.cleaning_process.pid)
3637
+ cde.register_worker_pids(id(self), set(worker_ids))
3638
+
3639
+ def _abort_monitor(self):
3640
+ """Deregister workers monitored by the watch dog and join clean process."""
3641
+ if get_enable_watchdog():
3642
+ cde.deregister_worker_pids(id(self))
3643
+ if hasattr(self, 'eof') and self.eof is not None:
3644
+ self.eof.set()
3632
3645
  if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
3633
- if hasattr(self, 'eof') and self.eof is not None and not self.eof.is_set():
3634
- self.eof.set()
3635
- _PythonMultiprocessing._terminate_processes([self.cleaning_process])
3646
+ # let the quit event notify the cleaning process to exit
3647
+ self.cleaning_process.join(timeout=5)
3648
+ if self.cleaning_process.is_alive():
3649
+ # if the cleaning process did not exit, it may hang, try to terminate it
3650
+ _PythonMultiprocessing._terminate_processes([self.cleaning_process])
3636
3651
  del self.cleaning_process
3637
3652
 
3638
3653
  def is_running(self):
3639
3654
  if hasattr(self, 'workers') and self.workers is not None:
3640
- return all([w.is_alive() for w in self.workers])
3655
+ if self.start_method == "fork":
3656
+ return all([w.is_alive() for w in self.workers])
3657
+ return all([worker_is_alive(w) for w in self.workers])
3641
3658
  return False
3642
3659
 
3643
3660
  def close_all_workers(self):
3644
3661
  """Close all the subprocess workers"""
3645
3662
  if hasattr(self, 'workers') and self.workers is not None:
3646
- for w in self.workers:
3647
- w.close()
3663
+ if self.start_method == "fork":
3664
+ for w in self.workers:
3665
+ w.close()
3666
+ else:
3667
+ for i, w in enumerate(self.workers):
3668
+ close_worker(w, self.pipes[i])
3669
+
3648
3670
  check_interval = get_multiprocessing_timeout_interval()
3649
3671
  for w in self.workers:
3650
3672
  try:
@@ -3660,8 +3682,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3660
3682
  continue
3661
3683
  raise e
3662
3684
  try:
3663
- if w.is_alive():
3664
- os.close(subprocess_file_descriptor)
3685
+ if self.start_method == "fork":
3686
+ if w.is_alive():
3687
+ os.close(subprocess_file_descriptor)
3688
+ else:
3689
+ if worker_is_alive(w):
3690
+ os.close(subprocess_file_descriptor)
3665
3691
  except OSError as e:
3666
3692
  # Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
3667
3693
  if "Bad file descriptor" not in str(e):
@@ -3670,6 +3696,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3670
3696
  # use clear to release the handle which is better than self.workers = None
3671
3697
  self.workers.clear()
3672
3698
  self.workers = None
3699
+ self.pipes.clear()
3700
+ self.pipes = None
3673
3701
  self.pids = None
3674
3702
 
3675
3703
 
@@ -3782,11 +3810,6 @@ class MapDataset(UnionBaseDataset):
3782
3810
  def __deepcopy__(self, memodict):
3783
3811
  return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
3784
3812
 
3785
- def __del__(self):
3786
- if hasattr(self, "process_pool") and self.process_pool is not None:
3787
- self.process_pool.terminate()
3788
- del self.process_pool
3789
-
3790
3813
  @staticmethod
3791
3814
  def __parse_op_name(op):
3792
3815
  """
@@ -3915,8 +3938,9 @@ class MapDataset(UnionBaseDataset):
3915
3938
  callable_list.append(op)
3916
3939
 
3917
3940
  if callable_list:
3918
- self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, callable_list,
3919
- self.max_rowsize)
3941
+ self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(),
3942
+ self.num_parallel_workers, str(self),
3943
+ callable_list, self.max_rowsize)
3920
3944
  # Pass #2
3921
3945
  idx = 0
3922
3946
  for op in self.operations:
@@ -4142,6 +4166,7 @@ class ConcatDataset(UnionBaseDataset):
4142
4166
  if isinstance(c, ConcatDataset):
4143
4167
  c.use_sampler(sampler)
4144
4168
  set_child(c)
4169
+
4145
4170
  set_child(self)
4146
4171
 
4147
4172
  return
@@ -4242,7 +4267,7 @@ class _ToDevice:
4242
4267
  if get_debug_mode():
4243
4268
  logger.error("MindData debugger cannot be used in dataset sink mode. Please manually turn off "
4244
4269
  "sink mode and try debugger again.")
4245
- ir_tree, self.api_tree = dataset.create_ir_tree()
4270
+ ir_tree, _ = dataset.create_ir_tree()
4246
4271
 
4247
4272
  self._runtime_context = cde.PythonRuntimeContext()
4248
4273
  self._runtime_context.Init()
@@ -4442,7 +4467,7 @@ class Schema:
4442
4467
  Class to represent a schema of a dataset.
4443
4468
 
4444
4469
  Args:
4445
- schema_file (str): Path of the schema file. Default: ``None``.
4470
+ schema_file (str, optional): Path of the schema file. Default: ``None``.
4446
4471
 
4447
4472
  Raises:
4448
4473
  RuntimeError: If schema file failed to load.