mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (577) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +13 -6
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +3 -0
  7. mindspore/_checkparam.py +3 -38
  8. mindspore/_deprecated/__init__.py +17 -0
  9. mindspore/_deprecated/jit.py +198 -0
  10. mindspore/_extends/builtin_operations.py +1 -1
  11. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  12. mindspore/_extends/parse/__init__.py +6 -7
  13. mindspore/_extends/parse/compile_config.py +83 -0
  14. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  16. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  17. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  18. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  19. mindspore/_extends/parse/parser.py +46 -197
  20. mindspore/_extends/parse/resources.py +1 -5
  21. mindspore/_extends/parse/standard_method.py +217 -98
  22. mindspore/_extends/pijit/__init__.py +2 -2
  23. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  24. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  25. mindspore/_extends/utils.py +1 -1
  26. mindspore/amp.py +11 -5
  27. mindspore/avcodec-59.dll +0 -0
  28. mindspore/avdevice-59.dll +0 -0
  29. mindspore/avfilter-8.dll +0 -0
  30. mindspore/avformat-59.dll +0 -0
  31. mindspore/avutil-57.dll +0 -0
  32. mindspore/boost/__init__.py +2 -2
  33. mindspore/boost/base.py +3 -7
  34. mindspore/boost/boost_cell_wrapper.py +138 -43
  35. mindspore/common/__init__.py +6 -3
  36. mindspore/common/_grad_function.py +56 -0
  37. mindspore/common/_pijit_context.py +14 -5
  38. mindspore/common/_register_for_tensor.py +1 -2
  39. mindspore/common/_stub_tensor.py +30 -14
  40. mindspore/common/_tensor_cpp_method.py +17 -0
  41. mindspore/common/_tensor_docs.py +4760 -0
  42. mindspore/common/api.py +435 -371
  43. mindspore/common/auto_dynamic_shape.py +41 -44
  44. mindspore/common/dtype.py +39 -36
  45. mindspore/common/dump.py +9 -6
  46. mindspore/common/file_system.py +9 -1
  47. mindspore/common/generator.py +2 -0
  48. mindspore/common/hook_handle.py +6 -2
  49. mindspore/common/initializer.py +13 -10
  50. mindspore/common/jit_begin_end.py +94 -0
  51. mindspore/common/jit_config.py +6 -1
  52. mindspore/common/jit_context.py +76 -0
  53. mindspore/common/jit_trace.py +378 -0
  54. mindspore/common/lazy_inline.py +9 -3
  55. mindspore/common/mindir_util.py +10 -2
  56. mindspore/common/mutable.py +5 -4
  57. mindspore/common/parameter.py +135 -52
  58. mindspore/common/seed.py +2 -2
  59. mindspore/common/sparse_tensor.py +23 -17
  60. mindspore/common/tensor.py +951 -1992
  61. mindspore/communication/__init__.py +7 -5
  62. mindspore/communication/_comm_helper.py +52 -2
  63. mindspore/communication/comm_func.py +240 -181
  64. mindspore/communication/management.py +95 -26
  65. mindspore/context.py +314 -566
  66. mindspore/dataset/__init__.py +65 -37
  67. mindspore/dataset/audio/__init__.py +2 -8
  68. mindspore/dataset/audio/transforms.py +3 -17
  69. mindspore/dataset/callback/ds_callback.py +2 -1
  70. mindspore/dataset/core/config.py +87 -6
  71. mindspore/dataset/engine/cache_admin.py +3 -3
  72. mindspore/dataset/engine/cache_client.py +6 -5
  73. mindspore/dataset/engine/datasets.py +292 -267
  74. mindspore/dataset/engine/datasets_audio.py +22 -8
  75. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  76. mindspore/dataset/engine/datasets_text.py +78 -48
  77. mindspore/dataset/engine/datasets_user_defined.py +182 -116
  78. mindspore/dataset/engine/datasets_vision.py +120 -44
  79. mindspore/dataset/engine/iterators.py +283 -63
  80. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  81. mindspore/dataset/engine/obs/util.py +8 -0
  82. mindspore/dataset/engine/queue.py +40 -0
  83. mindspore/dataset/engine/samplers.py +289 -43
  84. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  85. mindspore/dataset/engine/validators.py +53 -11
  86. mindspore/dataset/text/__init__.py +7 -6
  87. mindspore/dataset/text/transforms.py +6 -5
  88. mindspore/dataset/text/utils.py +3 -3
  89. mindspore/dataset/transforms/__init__.py +0 -9
  90. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  91. mindspore/dataset/transforms/transforms.py +31 -14
  92. mindspore/dataset/utils/browse_dataset.py +1 -1
  93. mindspore/dataset/vision/__init__.py +2 -9
  94. mindspore/dataset/vision/transforms.py +202 -158
  95. mindspore/dataset/vision/utils.py +7 -5
  96. mindspore/dataset/vision/validators.py +1 -2
  97. mindspore/device_context/__init__.py +21 -0
  98. mindspore/device_context/ascend/__init__.py +25 -0
  99. mindspore/device_context/ascend/device.py +72 -0
  100. mindspore/device_context/ascend/op_debug.py +153 -0
  101. mindspore/device_context/ascend/op_precision.py +193 -0
  102. mindspore/device_context/ascend/op_tuning.py +123 -0
  103. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  104. mindspore/device_context/cpu/device.py +62 -0
  105. mindspore/device_context/cpu/op_tuning.py +43 -0
  106. mindspore/device_context/gpu/__init__.py +21 -0
  107. mindspore/device_context/gpu/device.py +70 -0
  108. mindspore/device_context/gpu/op_precision.py +67 -0
  109. mindspore/device_context/gpu/op_tuning.py +175 -0
  110. mindspore/device_manager.py +170 -0
  111. mindspore/experimental/es/embedding_service.py +35 -27
  112. mindspore/experimental/llm_boost/__init__.py +1 -0
  113. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  114. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  115. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  116. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  117. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  118. mindspore/experimental/llm_boost/register.py +1 -0
  119. mindspore/experimental/map_parameter.py +4 -4
  120. mindspore/experimental/optim/adadelta.py +6 -6
  121. mindspore/experimental/optim/adagrad.py +4 -4
  122. mindspore/experimental/optim/adam.py +7 -0
  123. mindspore/experimental/optim/adamax.py +4 -4
  124. mindspore/experimental/optim/adamw.py +4 -0
  125. mindspore/experimental/optim/asgd.py +1 -1
  126. mindspore/experimental/optim/lr_scheduler.py +73 -46
  127. mindspore/experimental/optim/radam.py +34 -31
  128. mindspore/experimental/optim/rprop.py +1 -1
  129. mindspore/experimental/optim/sgd.py +1 -1
  130. mindspore/hal/contiguous_tensors_handle.py +6 -10
  131. mindspore/hal/device.py +55 -53
  132. mindspore/hal/event.py +52 -52
  133. mindspore/hal/memory.py +157 -117
  134. mindspore/hal/stream.py +150 -109
  135. mindspore/include/api/context.h +0 -1
  136. mindspore/include/dataset/constants.h +7 -4
  137. mindspore/include/dataset/execute.h +2 -2
  138. mindspore/jpeg62.dll +0 -0
  139. mindspore/log.py +50 -0
  140. mindspore/mindrecord/__init__.py +21 -8
  141. mindspore/mindrecord/config.py +17 -316
  142. mindspore/mindrecord/filereader.py +1 -9
  143. mindspore/mindrecord/filewriter.py +5 -15
  144. mindspore/mindrecord/mindpage.py +1 -9
  145. mindspore/mindspore_backend_common.dll +0 -0
  146. mindspore/mindspore_backend_manager.dll +0 -0
  147. mindspore/mindspore_common.dll +0 -0
  148. mindspore/mindspore_core.dll +0 -0
  149. mindspore/mindspore_dump.dll +0 -0
  150. mindspore/mindspore_frontend.dll +0 -0
  151. mindspore/mindspore_memory_pool.dll +0 -0
  152. mindspore/mindspore_ms_backend.dll +0 -0
  153. mindspore/mindspore_ops.dll +0 -0
  154. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  155. mindspore/mindspore_ops_kernel_common.dll +0 -0
  156. mindspore/mindspore_profiler.dll +0 -0
  157. mindspore/mindspore_pyboost.dll +0 -0
  158. mindspore/mindspore_pynative.dll +0 -0
  159. mindspore/mindspore_res_manager.dll +0 -0
  160. mindspore/mindspore_runtime_pipeline.dll +0 -0
  161. mindspore/mint/__init__.py +796 -759
  162. mindspore/mint/distributed/__init__.py +70 -4
  163. mindspore/mint/distributed/distributed.py +2679 -44
  164. mindspore/mint/linalg/__init__.py +8 -0
  165. mindspore/mint/nn/__init__.py +743 -22
  166. mindspore/mint/nn/functional.py +716 -23
  167. mindspore/mint/nn/layer/__init__.py +21 -4
  168. mindspore/mint/nn/layer/_functions.py +334 -0
  169. mindspore/mint/nn/layer/activation.py +276 -1
  170. mindspore/mint/nn/layer/basic.py +123 -0
  171. mindspore/mint/nn/layer/conv.py +921 -0
  172. mindspore/mint/nn/layer/normalization.py +223 -28
  173. mindspore/mint/nn/layer/padding.py +797 -0
  174. mindspore/mint/nn/layer/pooling.py +235 -0
  175. mindspore/mint/optim/__init__.py +3 -1
  176. mindspore/mint/optim/adam.py +223 -0
  177. mindspore/mint/optim/adamw.py +26 -19
  178. mindspore/mint/optim/sgd.py +171 -0
  179. mindspore/mint/special/__init__.py +2 -1
  180. mindspore/multiprocessing/__init__.py +5 -0
  181. mindspore/nn/__init__.py +4 -1
  182. mindspore/nn/cell.py +1370 -189
  183. mindspore/nn/dynamic_lr.py +2 -1
  184. mindspore/nn/layer/activation.py +29 -27
  185. mindspore/nn/layer/basic.py +51 -35
  186. mindspore/nn/layer/channel_shuffle.py +3 -3
  187. mindspore/nn/layer/container.py +1 -1
  188. mindspore/nn/layer/conv.py +22 -17
  189. mindspore/nn/layer/embedding.py +12 -11
  190. mindspore/nn/layer/normalization.py +56 -49
  191. mindspore/nn/layer/padding.py +4 -3
  192. mindspore/nn/layer/pooling.py +120 -42
  193. mindspore/nn/layer/rnn_cells.py +1 -1
  194. mindspore/nn/layer/rnns.py +2 -1
  195. mindspore/nn/layer/timedistributed.py +5 -5
  196. mindspore/nn/layer/transformer.py +59 -36
  197. mindspore/nn/learning_rate_schedule.py +8 -4
  198. mindspore/nn/loss/loss.py +58 -55
  199. mindspore/nn/optim/ada_grad.py +7 -5
  200. mindspore/nn/optim/adadelta.py +11 -9
  201. mindspore/nn/optim/adafactor.py +1 -1
  202. mindspore/nn/optim/adam.py +17 -13
  203. mindspore/nn/optim/adamax.py +8 -7
  204. mindspore/nn/optim/adasum.py +5 -5
  205. mindspore/nn/optim/asgd.py +1 -1
  206. mindspore/nn/optim/ftrl.py +11 -9
  207. mindspore/nn/optim/lamb.py +1 -1
  208. mindspore/nn/optim/lars.py +1 -4
  209. mindspore/nn/optim/lazyadam.py +12 -10
  210. mindspore/nn/optim/momentum.py +7 -6
  211. mindspore/nn/optim/optimizer.py +3 -3
  212. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  213. mindspore/nn/optim/rmsprop.py +13 -12
  214. mindspore/nn/optim/rprop.py +11 -9
  215. mindspore/nn/optim/sgd.py +9 -6
  216. mindspore/nn/optim/tft_wrapper.py +5 -2
  217. mindspore/nn/optim/thor.py +2 -1
  218. mindspore/nn/probability/bijector/bijector.py +17 -11
  219. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  220. mindspore/nn/probability/bijector/invert.py +2 -2
  221. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  222. mindspore/nn/probability/bijector/softplus.py +3 -2
  223. mindspore/nn/probability/distribution/beta.py +3 -3
  224. mindspore/nn/probability/distribution/categorical.py +1 -1
  225. mindspore/nn/probability/distribution/cauchy.py +4 -2
  226. mindspore/nn/probability/distribution/exponential.py +6 -7
  227. mindspore/nn/probability/distribution/gamma.py +2 -2
  228. mindspore/nn/probability/distribution/gumbel.py +2 -2
  229. mindspore/nn/probability/distribution/half_normal.py +5 -3
  230. mindspore/nn/probability/distribution/logistic.py +5 -3
  231. mindspore/nn/probability/distribution/poisson.py +1 -1
  232. mindspore/nn/probability/distribution/uniform.py +5 -3
  233. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  234. mindspore/nn/reinforcement/tensor_array.py +1 -1
  235. mindspore/nn/utils/init.py +13 -11
  236. mindspore/nn/wrap/__init__.py +6 -6
  237. mindspore/nn/wrap/cell_wrapper.py +181 -122
  238. mindspore/nn/wrap/grad_reducer.py +45 -36
  239. mindspore/nn/wrap/loss_scale.py +6 -7
  240. mindspore/numpy/array_creations.py +63 -65
  241. mindspore/numpy/array_ops.py +149 -144
  242. mindspore/numpy/logic_ops.py +41 -42
  243. mindspore/numpy/math_ops.py +365 -363
  244. mindspore/numpy/utils.py +17 -18
  245. mindspore/numpy/utils_const.py +5 -6
  246. mindspore/opencv_core452.dll +0 -0
  247. mindspore/opencv_imgcodecs452.dll +0 -0
  248. mindspore/opencv_imgproc452.dll +0 -0
  249. mindspore/ops/__init__.py +5 -3
  250. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  251. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  252. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  253. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  254. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  255. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  256. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  257. mindspore/ops/_register_for_op.py +0 -11
  258. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  259. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  260. mindspore/ops/_vmap/vmap_array_ops.py +27 -25
  261. mindspore/ops/_vmap/vmap_base.py +0 -2
  262. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  263. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  264. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  265. mindspore/ops/auto_generate/__init__.py +4 -3
  266. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
  267. mindspore/ops/auto_generate/gen_extend_func.py +764 -124
  268. mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
  269. mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
  270. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  271. mindspore/ops/composite/__init__.py +2 -1
  272. mindspore/ops/composite/base.py +20 -25
  273. mindspore/ops/composite/math_ops.py +6 -16
  274. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  275. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  276. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  277. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  278. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  279. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  280. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  281. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  282. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  283. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  284. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  285. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  286. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  287. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  288. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  289. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  290. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  291. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  292. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  293. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  294. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  295. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  296. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  297. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  299. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  301. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  302. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  303. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  304. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  305. mindspore/ops/function/__init__.py +40 -2
  306. mindspore/ops/function/_add_attr_func.py +58 -0
  307. mindspore/ops/function/array_func.py +2089 -2403
  308. mindspore/ops/function/clip_func.py +80 -23
  309. mindspore/ops/function/debug_func.py +57 -57
  310. mindspore/ops/function/grad/__init__.py +1 -0
  311. mindspore/ops/function/grad/grad_func.py +104 -71
  312. mindspore/ops/function/image_func.py +2 -2
  313. mindspore/ops/function/linalg_func.py +47 -78
  314. mindspore/ops/function/math_func.py +4501 -3802
  315. mindspore/ops/function/nn_func.py +1726 -620
  316. mindspore/ops/function/other_func.py +159 -1
  317. mindspore/ops/function/parameter_func.py +18 -84
  318. mindspore/ops/function/random_func.py +440 -387
  319. mindspore/ops/function/reshard_func.py +4 -70
  320. mindspore/ops/function/sparse_func.py +3 -3
  321. mindspore/ops/function/sparse_unary_func.py +6 -6
  322. mindspore/ops/function/spectral_func.py +25 -58
  323. mindspore/ops/function/vmap_func.py +24 -17
  324. mindspore/ops/functional.py +22 -7
  325. mindspore/ops/functional_overload.py +1440 -0
  326. mindspore/ops/op_info_register.py +32 -244
  327. mindspore/ops/operations/__init__.py +13 -7
  328. mindspore/ops/operations/_custom_ops_utils.py +247 -0
  329. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  330. mindspore/ops/operations/_grad_ops.py +2 -43
  331. mindspore/ops/operations/_infer_ops.py +2 -1
  332. mindspore/ops/operations/_inner_ops.py +43 -84
  333. mindspore/ops/operations/_ms_kernel.py +4 -10
  334. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  335. mindspore/ops/operations/_scalar_ops.py +3 -2
  336. mindspore/ops/operations/_sequence_ops.py +1 -1
  337. mindspore/ops/operations/_tensor_array.py +1 -1
  338. mindspore/ops/operations/array_ops.py +81 -324
  339. mindspore/ops/operations/comm_ops.py +154 -108
  340. mindspore/ops/operations/custom_ops.py +232 -78
  341. mindspore/ops/operations/debug_ops.py +153 -59
  342. mindspore/ops/operations/inner_ops.py +7 -5
  343. mindspore/ops/operations/linalg_ops.py +1 -57
  344. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  345. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  346. mindspore/ops/operations/math_ops.py +32 -234
  347. mindspore/ops/operations/nn_ops.py +210 -498
  348. mindspore/ops/operations/other_ops.py +62 -9
  349. mindspore/ops/operations/random_ops.py +13 -7
  350. mindspore/ops/operations/reshard_ops.py +1 -1
  351. mindspore/ops/operations/sparse_ops.py +2 -2
  352. mindspore/ops/primitive.py +66 -53
  353. mindspore/ops/tensor_method.py +1888 -0
  354. mindspore/ops_generate/__init__.py +0 -5
  355. mindspore/ops_generate/aclnn/__init__.py +0 -0
  356. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  357. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  358. mindspore/ops_generate/api/__init__.py +0 -0
  359. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  360. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  361. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  362. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  363. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  364. mindspore/ops_generate/api/gen_api.py +103 -0
  365. mindspore/ops_generate/api/op_api_proto.py +235 -0
  366. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  367. mindspore/ops_generate/common/__init__.py +0 -0
  368. mindspore/ops_generate/common/base_generator.py +11 -0
  369. mindspore/ops_generate/common/gen_constants.py +91 -0
  370. mindspore/ops_generate/common/gen_utils.py +348 -0
  371. mindspore/ops_generate/common/op_proto.py +473 -0
  372. mindspore/ops_generate/common/template.py +523 -0
  373. mindspore/ops_generate/gen_ops.py +22 -1069
  374. mindspore/ops_generate/op_def/__init__.py +0 -0
  375. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  376. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  377. mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
  378. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  379. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  380. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  381. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  382. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  383. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  384. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  385. mindspore/ops_generate/pyboost/__init__.py +0 -0
  386. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  387. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  388. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  389. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  390. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  391. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  392. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  393. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  394. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  395. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  396. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  397. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  398. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  399. mindspore/ops_generate/resources/__init__.py +0 -0
  400. mindspore/ops_generate/resources/resource_list.py +30 -0
  401. mindspore/ops_generate/resources/resource_loader.py +36 -0
  402. mindspore/ops_generate/resources/resource_manager.py +64 -0
  403. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  404. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  405. mindspore/parallel/__init__.py +7 -3
  406. mindspore/parallel/_auto_parallel_context.py +152 -34
  407. mindspore/parallel/_cell_wrapper.py +130 -15
  408. mindspore/parallel/_parallel_serialization.py +107 -5
  409. mindspore/parallel/_ps_context.py +1 -1
  410. mindspore/parallel/_recovery_context.py +7 -2
  411. mindspore/parallel/_tensor.py +142 -18
  412. mindspore/parallel/_utils.py +199 -23
  413. mindspore/parallel/algo_parameter_config.py +4 -4
  414. mindspore/parallel/auto_parallel.py +732 -0
  415. mindspore/parallel/checkpoint_convert.py +159 -0
  416. mindspore/parallel/checkpoint_transform.py +698 -35
  417. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  418. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  419. mindspore/parallel/cluster/run.py +21 -4
  420. mindspore/parallel/function/__init__.py +24 -0
  421. mindspore/parallel/function/reshard_func.py +259 -0
  422. mindspore/parallel/nn/__init__.py +25 -0
  423. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  424. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  425. mindspore/parallel/parameter_broadcast.py +25 -14
  426. mindspore/parallel/shard.py +137 -58
  427. mindspore/parallel/transform_safetensors.py +363 -305
  428. mindspore/profiler/__init__.py +22 -5
  429. mindspore/profiler/analysis/__init__.py +0 -0
  430. mindspore/profiler/analysis/parser/__init__.py +0 -0
  431. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  432. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  433. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  434. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  435. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  436. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  437. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  438. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  439. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
  440. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  441. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  442. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  443. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  444. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  445. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  446. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  447. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  448. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  449. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  450. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  451. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  452. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  453. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  454. mindspore/profiler/analysis/task_manager.py +131 -0
  455. mindspore/profiler/analysis/time_converter.py +84 -0
  456. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  457. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  458. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  459. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  460. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  461. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  462. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  463. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  464. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  465. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  466. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  467. mindspore/profiler/analysis/work_flow.py +73 -0
  468. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  469. mindspore/profiler/common/command_executor.py +90 -0
  470. mindspore/profiler/common/constant.py +186 -3
  471. mindspore/profiler/common/file_manager.py +208 -0
  472. mindspore/profiler/common/log.py +130 -0
  473. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  474. mindspore/profiler/common/path_manager.py +395 -0
  475. mindspore/profiler/common/process_bar.py +168 -0
  476. mindspore/profiler/common/process_pool.py +9 -3
  477. mindspore/profiler/common/profiler_context.py +500 -0
  478. mindspore/profiler/common/profiler_info.py +304 -0
  479. mindspore/profiler/common/profiler_meta_data.py +74 -0
  480. mindspore/profiler/common/profiler_output_path.py +284 -0
  481. mindspore/profiler/common/profiler_parameters.py +251 -0
  482. mindspore/profiler/common/profiler_path_manager.py +179 -0
  483. mindspore/profiler/common/record_function.py +76 -0
  484. mindspore/profiler/common/tlv_decoder.py +76 -0
  485. mindspore/profiler/common/util.py +75 -2
  486. mindspore/profiler/dynamic_profiler.py +341 -75
  487. mindspore/profiler/envprofiler.py +163 -0
  488. mindspore/profiler/experimental_config.py +197 -0
  489. mindspore/profiler/mstx.py +242 -0
  490. mindspore/profiler/platform/__init__.py +21 -0
  491. mindspore/profiler/platform/base_profiler.py +40 -0
  492. mindspore/profiler/platform/cpu_profiler.py +124 -0
  493. mindspore/profiler/platform/gpu_profiler.py +74 -0
  494. mindspore/profiler/platform/npu_profiler.py +335 -0
  495. mindspore/profiler/profiler.py +1073 -90
  496. mindspore/profiler/profiler_action_controller.py +187 -0
  497. mindspore/profiler/profiler_interface.py +118 -0
  498. mindspore/profiler/schedule.py +243 -0
  499. mindspore/rewrite/api/node.py +15 -13
  500. mindspore/rewrite/api/symbol_tree.py +2 -3
  501. mindspore/run_check/_check_version.py +27 -20
  502. mindspore/run_check/run_check.py +1 -1
  503. mindspore/runtime/__init__.py +37 -0
  504. mindspore/runtime/device.py +27 -0
  505. mindspore/runtime/event.py +209 -0
  506. mindspore/runtime/executor.py +177 -0
  507. mindspore/runtime/memory.py +409 -0
  508. mindspore/runtime/stream.py +460 -0
  509. mindspore/runtime/thread_bind_core.py +401 -0
  510. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  511. mindspore/swresample-4.dll +0 -0
  512. mindspore/swscale-6.dll +0 -0
  513. mindspore/tinyxml2.dll +0 -0
  514. mindspore/train/__init__.py +8 -8
  515. mindspore/train/_utils.py +88 -25
  516. mindspore/train/amp.py +9 -5
  517. mindspore/train/callback/__init__.py +2 -2
  518. mindspore/train/callback/_callback.py +2 -16
  519. mindspore/train/callback/_checkpoint.py +53 -55
  520. mindspore/train/callback/_cluster_monitor.py +14 -18
  521. mindspore/train/callback/_early_stop.py +1 -1
  522. mindspore/train/callback/_flops_collector.py +103 -68
  523. mindspore/train/callback/_history.py +8 -5
  524. mindspore/train/callback/_lambda_callback.py +2 -2
  525. mindspore/train/callback/_landscape.py +0 -3
  526. mindspore/train/callback/_loss_monitor.py +2 -1
  527. mindspore/train/callback/_on_request_exit.py +6 -5
  528. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  529. mindspore/train/callback/_summary_collector.py +52 -19
  530. mindspore/train/callback/_time_monitor.py +2 -1
  531. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
  532. mindspore/train/data_sink.py +25 -2
  533. mindspore/train/dataset_helper.py +15 -16
  534. mindspore/train/loss_scale_manager.py +8 -7
  535. mindspore/train/metrics/accuracy.py +3 -3
  536. mindspore/train/metrics/confusion_matrix.py +9 -9
  537. mindspore/train/metrics/error.py +3 -3
  538. mindspore/train/metrics/hausdorff_distance.py +4 -4
  539. mindspore/train/metrics/mean_surface_distance.py +3 -3
  540. mindspore/train/metrics/metric.py +0 -12
  541. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  542. mindspore/train/metrics/precision.py +11 -10
  543. mindspore/train/metrics/recall.py +9 -9
  544. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  545. mindspore/train/mind_ir_pb2.py +174 -46
  546. mindspore/train/model.py +184 -113
  547. mindspore/train/serialization.py +622 -978
  548. mindspore/train/summary/_summary_adapter.py +2 -2
  549. mindspore/train/summary/summary_record.py +2 -3
  550. mindspore/train/train_thor/model_thor.py +1 -1
  551. mindspore/turbojpeg.dll +0 -0
  552. mindspore/utils/__init__.py +6 -3
  553. mindspore/utils/dryrun.py +140 -0
  554. mindspore/utils/hooks.py +81 -0
  555. mindspore/utils/runtime_execution_order_check.py +550 -0
  556. mindspore/utils/utils.py +138 -4
  557. mindspore/version.py +1 -1
  558. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
  559. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +562 -393
  560. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
  561. mindspore/_install_custom.py +0 -43
  562. mindspore/common/_register_for_adapter.py +0 -74
  563. mindspore/common/_tensor_overload.py +0 -139
  564. mindspore/mindspore_np_dtype.dll +0 -0
  565. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  566. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  567. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  568. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  569. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  570. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  571. mindspore/ops_generate/gen_utils.py +0 -209
  572. mindspore/ops_generate/op_proto.py +0 -145
  573. mindspore/ops_generate/template.py +0 -261
  574. mindspore/profiler/envprofiling.py +0 -254
  575. mindspore/profiler/profiling.py +0 -1926
  576. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
  577. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2019-2024 Huawei Technologies Co., Ltd
1
+ # Copyright 2019-2025 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -18,37 +18,37 @@ You can define your own dataset loading class, and use GeneratorDataset to help
18
18
  After declaring the dataset object, you can further apply dataset operations
19
19
  (e.g. filter, skip, concat, map, batch) on it.
20
20
  """
21
+ import atexit
21
22
  import builtins
22
23
  import copy
23
24
  import errno
24
25
  import itertools
25
26
  import math
26
- import os
27
- import signal
28
- import time
29
- from types import GeneratorType
30
27
  import multiprocessing
31
- from multiprocessing.util import Finalize
28
+ import os
29
+ import platform
32
30
  import queue
33
- from functools import partial
31
+ import signal
34
32
  import subprocess
35
33
  import threading
34
+ import time
36
35
  import weakref
37
- import platform
38
- import psutil
36
+ from functools import partial
37
+ from types import GeneratorType
38
+
39
+ import dill
39
40
  import numpy as np
41
+ import psutil
40
42
 
41
43
  import mindspore._c_dataengine as cde
42
-
43
- from mindspore.common import Tensor
44
44
  from mindspore import log as logger
45
-
46
- from .datasets import UnionBaseDataset, MappableDataset, Schema, to_list, _PythonMultiprocessing, _check_shm_usage
45
+ from mindspore.common import Tensor
47
46
  from . import samplers
47
+ from .datasets import UnionBaseDataset, MappableDataset, Schema, to_list, _PythonMultiprocessing, _check_shm_usage
48
48
  from .queue import _SharedQueue
49
- from .validators import check_generatordataset, check_numpyslicesdataset, check_paddeddataset
49
+ from .validators import check_generator_dataset, check_numpy_slices_dataset, check_padded_dataset
50
50
  from ..core.config import get_enable_shared_mem, get_prefetch_size, get_multiprocessing_timeout_interval, \
51
- get_enable_watchdog, get_debug_mode, get_seed, set_seed
51
+ get_enable_watchdog, get_debug_mode, get_seed, set_seed, get_multiprocessing_start_method
52
52
  from ..core.datatypes import mstypelist_to_detypelist
53
53
  from ..core.py_util_helpers import ExceptionHandler
54
54
  from ..core.validator_helpers import type_check
@@ -221,33 +221,31 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
221
221
  self.ppid = os.getpid()
222
222
  self.pids = []
223
223
  self.check_interval = get_multiprocessing_timeout_interval() # the interval of check queue's size
224
- self._final_join = True
225
224
 
226
- # Event for end of epoch
227
225
  if self.multi_process is True:
226
+ multiprocessing.set_start_method(get_multiprocessing_start_method(), True)
227
+ # Event for end of epoch
228
228
  try:
229
229
  self.eof = multiprocessing.Event()
230
230
  except Exception:
231
231
  raise RuntimeError("Init multiprocessing.Event() failed, This might be caused by insufficient shm,"
232
232
  + " and the recommended shm size is at least 5 GB.")
233
- else:
234
- self.eof = threading.Event()
235
- # Create workers
236
-
237
- # get default queue size and adjust queue size per worker if there are large # workers
238
- queue_size = get_prefetch_size()
239
- queue_size = min(queue_size, queue_size * 4 // self.num_worker)
240
- queue_size = max(2, queue_size)
241
-
242
- if self.multi_process and get_enable_shared_mem():
243
- # generator dataset use idx_queue and res_queue to transfer data between main and subprocess
244
- # idx_queue is used multiprocess.Queue which is not shared memory, so it's size is 0.
245
- # res_queue is used shared memory, so its size is max_rowsize which is defined by user.
246
- _check_shm_usage(self.num_worker, queue_size, 0, self.max_rowsize)
247
- self.count = multiprocessing.Value('i', 0)
248
- for worker_id in range(self.num_worker):
249
- if self.multi_process is True:
233
+
234
+ # Create workers
235
+ # get default queue size and adjust queue size per worker if there are large # workers
236
+ queue_size = get_prefetch_size()
237
+ queue_size = min(queue_size, queue_size * 4 // self.num_worker)
238
+ queue_size = max(2, queue_size)
239
+
240
+ if get_enable_shared_mem():
241
+ # generator dataset use idx_queue and res_queue to transfer data between main and subprocess
242
+ # idx_queue is used multiprocess.Queue which is not shared memory, so it's size is 0.
243
+ # res_queue is used shared memory, so its size is max_rowsize which is defined by user.
244
+ _check_shm_usage(self.num_worker, queue_size, 0, self.max_rowsize)
245
+ self.count = multiprocessing.Value('i', 0)
246
+ for worker_id in range(self.num_worker):
250
247
  try:
248
+ logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
251
249
  worker = _GeneratorWorkerMp(self.dataset, self.eof, self.max_rowsize, queue_size, self.ppid,
252
250
  self.count, worker_id)
253
251
  worker.daemon = True
@@ -267,13 +265,26 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
267
265
  raise RuntimeError("Failed to launch multiprocessing of GeneratorDataset: {0}".format(e))
268
266
  self.pids.append(worker.pid)
269
267
  self.need_join = True
270
- else:
268
+ self.workers.append(worker)
269
+ multiprocessing.set_start_method("fork", True)
270
+
271
+ logger.info("Launch generator worker process(es): {}".format([worker.pid for worker in self.workers]))
272
+ if platform.system().lower() != 'windows':
273
+ self._launch_monitor()
274
+ else:
275
+ self.eof = threading.Event()
276
+ for worker_id in range(self.num_worker):
271
277
  worker = _GeneratorWorkerMt(self.dataset, self.eof, worker_id)
272
278
  worker.daemon = True
273
279
  self.need_join = True
274
- self.workers.append(worker)
275
- if self.multi_process and platform.system().lower() != 'windows':
276
- self._launch_cleanup_worker()
280
+ self.workers.append(worker)
281
+
282
+ # Register a termination function using weakref to avoid the object from unable to properly destruct.
283
+ atexit.register(lambda cleanup: cleanup()() if cleanup() is not None else None,
284
+ weakref.WeakMethod(self.terminate))
285
+
286
+ def terminate(self):
287
+ self._stop_subprocess()
277
288
 
278
289
  def _interval_log(self, i, start_time, wait_count):
279
290
  cost_time = int(time.time()) - start_time
@@ -394,9 +405,11 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
394
405
  "the `mindspore.dataset.config.set_multiprocessing_timeout_interval` interface."
395
406
  logger.warning(warning_message)
396
407
 
397
- def _launch_cleanup_worker(self):
408
+ def _launch_monitor(self):
398
409
  """
399
- We need a extra thread and process if main process or subprocess was killed.
410
+ Launch a clean process and register subprocess to be monitored by the watch dog.
411
+ The clean process will clean up subprocesses when main process exited.
412
+ The watch dog will clean up subprocesses and main process when any subprocess exited.
400
413
  """
401
414
  _clean_worker_func = _PythonMultiprocessing._clean_process # pylint: disable=W0212
402
415
  self.cleaning_process = multiprocessing.Process(target=_clean_worker_func,
@@ -404,21 +417,13 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
404
417
  args=(self.ppid, self.workers, self.eof))
405
418
  self.cleaning_process.daemon = True
406
419
  self.cleaning_process.start()
420
+ logger.info("Launch clean process {} to monitor worker "
421
+ "process(es): {}".format(self.cleaning_process.pid, [worker.pid for worker in self.workers]))
407
422
 
408
423
  if get_enable_watchdog():
409
- self.eot = threading.Event()
410
- self.watch_dog = threading.Thread(target=_PythonMultiprocessing._watch_dog, # pylint: disable=W0212
411
- name="GeneratorWatchDog",
412
- args=(self.eot, self.workers + [self.cleaning_process]))
413
- self.watch_dog.daemon = True
414
- self.watch_dog.start()
415
-
416
- if self._final_join is True:
417
- self._jointhread = Finalize(
418
- self.watch_dog, self._finalize_join,
419
- args=(weakref.ref(self.watch_dog), self.eot),
420
- exitpriority=-5
421
- )
424
+ worker_ids = [worker.pid for worker in self.workers]
425
+ worker_ids.append(self.cleaning_process.pid)
426
+ cde.register_worker_pids(id(self), set(worker_ids))
422
427
 
423
428
  def _release_fd(self):
424
429
  """Release the file descriptor by subprocess"""
@@ -454,16 +459,9 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
454
459
  def _stop_subprocess(self):
455
460
  """Only the main process can call join. All the sub-process / sub-thread will be stopped."""
456
461
  if self.need_join is True and self.ppid == os.getpid():
457
- # the sub-process / sub-thread will stop by self.eof.set()
458
- if hasattr(self, 'eof') and self.eof is not None:
459
- try:
460
- self.eof.set()
461
- except AttributeError: # maybe occur "'NoneType' object has no attribute 'maxsize'"
462
- pass
463
-
464
- # close the watch dog first
465
- self._abort_watchdog()
466
462
  self.need_join = False
463
+ # abort the monitor first
464
+ self._abort_monitor()
467
465
 
468
466
  # waiting for the sub-process stop
469
467
  for w in self.workers:
@@ -488,11 +486,17 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
488
486
 
489
487
  self.workers.clear()
490
488
  self.workers = None
489
+ # Under independent processes, the GeneratorDataset pulls up multiple processes in a spawn manner, and
490
+ # after the use case exits normally, there will be a warning: UserWarning: resource_tracker: There appear
491
+ # to be %d leaked semaphore objects to clean up at shutdown.
492
+ self.eof = None
491
493
 
492
- def _abort_watchdog(self):
493
- """Let watchdog quit."""
494
- if hasattr(self, 'eot') and self.eot is not None and not self.eot.is_set():
495
- self.eot.set()
494
+ def _abort_monitor(self):
495
+ """Deregister workers monitored by the watch dog and join clean process."""
496
+ if get_enable_watchdog():
497
+ cde.deregister_worker_pids(id(self))
498
+ if hasattr(self, 'eof') and self.eof is not None:
499
+ self.eof.set()
496
500
  if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
497
501
  # let the quit event notify the cleaning process to exit
498
502
  self.cleaning_process.join(timeout=5)
@@ -503,14 +507,6 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
503
507
  if hasattr(self, 'count'):
504
508
  del self.count
505
509
 
506
- @classmethod
507
- def _finalize_join(cls, twr, eot):
508
- thread = twr()
509
- if thread is not None:
510
- if eot is not None and not eot.is_set():
511
- eot.set()
512
- thread.join()
513
-
514
510
  def __del__(self):
515
511
  try:
516
512
  self._stop_subprocess()
@@ -521,10 +517,6 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
521
517
  self.__init__(self.dataset, self.num_worker, self.multi_process, self.max_rowsize)
522
518
 
523
519
 
524
- def _subprocess_handle(eof, signum, frame):
525
- threading.Thread(target=eof.set()).start()
526
-
527
-
528
520
  def _ignore_sigint(is_multiprocessing):
529
521
  """
530
522
  We need to ignore sigint signal here so subprocesses can exit normally and clear.
@@ -554,8 +546,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof, is_multiproces
554
546
  cde.register_worker_handlers()
555
547
 
556
548
  if is_multiprocessing:
557
- result_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
558
- signal.signal(signal.SIGTERM, partial(_subprocess_handle, eof))
549
+ result_queue.cancel_join_thread() # Ensure that the process does not hang when exiting
559
550
 
560
551
  # init the random seed and np.random seed for the subprocess
561
552
  if get_seed() != 5489:
@@ -694,6 +685,7 @@ class _GeneratorWorkerMp(multiprocessing.Process):
694
685
 
695
686
  class _GeneratorWrapper:
696
687
  """Wrapper the generator so that it can be iterated multiple times in GeneratorDataset."""
688
+
697
689
  def __init__(self, generator):
698
690
  self.generator = generator
699
691
  self.generator_new, self.generator = itertools.tee(self.generator)
@@ -706,6 +698,25 @@ class _GeneratorWrapper:
706
698
  return next(self.generator_new)
707
699
 
708
700
 
701
+ class _PickleGeneratorSource:
702
+ """Starting multiple processes in spawn mode requires pickling source object in GeneratorDataset."""
703
+ def __init__(self, dataset):
704
+ self.dataset = dataset
705
+
706
+ def __getitem__(self, index):
707
+ return self.dataset[index]
708
+
709
+ def __len__(self):
710
+ return len(self.dataset)
711
+
712
+ def __getstate__(self):
713
+ state = dill.dumps(self.dataset)
714
+ return state
715
+
716
+ def __setstate__(self, state):
717
+ self.dataset = dill.loads(state)
718
+
719
+
709
720
  class GeneratorDataset(MappableDataset, UnionBaseDataset):
710
721
  """
711
722
  A source dataset that generates data from Python by invoking Python data source each epoch.
@@ -713,20 +724,29 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
713
724
  The column names and column types of generated dataset depend on Python data defined by users.
714
725
 
715
726
  Args:
716
- source (Union[Callable, Iterable, Random Accessible]):
717
- A generator callable object, an iterable Python object or a random accessible Python object.
718
- Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
719
- Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
720
- iter(source).next().
721
- Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
722
- source[idx].
727
+ source (Union[Random Accessible, Iterable]): A custom dataset from which to load the data.
728
+ MindSpore supports the following types of datasets:
729
+
730
+ - Random-accessible (map-style) datasets: A dataset object that implements the `__getitem__()`
731
+ and `__len__()` methods, represents a mapping from indexes/keys to data samples.
732
+ For example, such a dataset `source`, when accessed with `source[idx]`, can read the idx-th sample
733
+ from disk, see `Random-accessible dataset example <https://www.mindspore.cn/tutorials/en/master/
734
+ beginner/dataset.html#random-accessible-dataset>`_ for details.
735
+
736
+ - Iterable-style dataset: An iterable dataset object that implements `__iter__()` and `__next__()` methods,
737
+ represents an iterable over data samples. This type of dataset is suitable for situations where
738
+ random reads are costly or even impossible, and where batch sizes depend on the data being acquired.
739
+ For example, such a dataset `source`, when accessed `iter(source)`, can return a stream of data reading
740
+ from a database or remote server, see `Iterable-style dataset example
741
+ <https://www.mindspore.cn/tutorials/en/master/beginner/dataset.html#iterable-dataset>`_ for details.
742
+
723
743
  column_names (Union[str, list[str]], optional): List of column names of the dataset. Default: ``None`` .
724
744
  Users are required to provide either column_names or schema.
725
745
  column_types (list[mindspore.dtype], optional): List of column data types of the dataset. Default: ``None`` .
726
- If provided, sanity check will be performed on generator output.
746
+ If provided, sanity check will be performed on generator output (deprecated in future version).
727
747
  schema (Union[str, Schema], optional): Data format policy, which specifies the data types and shapes of the data
728
748
  column to be read. Both JSON file path and objects constructed by :class:`mindspore.dataset.Schema` are
729
- acceptable. Default: ``None`` .
749
+ acceptable (deprecated in future version). Default: ``None`` .
730
750
  num_samples (int, optional): The number of samples to be included in the dataset.
731
751
  Default: ``None`` , all images.
732
752
  num_parallel_workers (int, optional): Number of worker threads/subprocesses used to
@@ -737,7 +757,8 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
737
757
  input is required. Default: ``None`` , expected order behavior shown in the table below.
738
758
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
739
759
  Random accessible input is required. When this argument is specified, `num_samples` reflects the maximum
740
- sample number of per shard.
760
+ sample number of per shard. Used in `data parallel training <https://www.mindspore.cn/tutorials/en/master/
761
+ parallel/data_parallel.html#loading-datasets>`_ .
741
762
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` .
742
763
  This argument must be specified only when `num_shards` is also specified.
743
764
  Random accessible input is required.
@@ -745,9 +766,15 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
745
766
  option could be beneficial if the Python operation is computational heavy. Default: ``True``.
746
767
  max_rowsize(int, optional): Maximum size of data (in MB) that is used for shared memory
747
768
  allocation to copy data between processes, the total occupied shared memory will increase as
748
- ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. If set to -1,
769
+ ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. If set to ``-1``,
749
770
  shared memory will be dynamically allocated with the actual size of data. This is only used if
750
- ``python_multiprocessing`` is set to True. Default: ``None`` , allocate shared memory dynamically.
771
+ ``python_multiprocessing`` is set to ``True``. Default: ``None`` , allocate shared memory dynamically
772
+ (deprecated in future version).
773
+ batch_sampler (Iterable, optional): Similar to `sampler` , but returns a batch of indices at a time, the
774
+ corresponding data will be combined into a batch. Mutually exclusive with `num_samples` , `shuffle` ,
775
+ `num_shards` , `shard_id` and `sampler` . Default: ``None`` , do not use batch sampler.
776
+ collate_fn (Callable[List[numpy.ndarray]], optional): Define how to merge a list of data into a batch.
777
+ Only valid if `batch_sampler` is used. Default: ``None`` , do not use collation function.
751
778
 
752
779
  Raises:
753
780
  RuntimeError: If source raises an exception during execution.
@@ -758,6 +785,11 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
758
785
  ValueError: If `num_shards` is specified but shard_id is None.
759
786
  ValueError: If shard_id is specified but `num_shards` is None.
760
787
  ValueError: If `shard_id` is not in range of [0, `num_shards` ).
788
+ TypeError: If `batch_sampler` is not iterable.
789
+ ValueError: If `batch_sampler` is specified together with `num_samples` ,
790
+ `shuffle` , `num_shards` , `shard_id` and `sampler`.
791
+ TypeError: If `collate_fn` is not callable.
792
+ ValueError: If `collate_fn` is specified while `batch_sampler` is None.
761
793
 
762
794
  Tutorial Examples:
763
795
  - `Load & Process Data With Dataset Pipeline
@@ -777,15 +809,35 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
777
809
  (such as Pandas, Numpy or PyArrow objects) for member variables, or load less metadata in member variables,
778
810
  or configure `python_multiprocessing=False` to use multi-threading mode.
779
811
 
780
- There are several classes/functions that can help you reduce the size of member variables, and you can choose
781
- to use them:
812
+ You can use the following classes/functions to reduce the size of member variables:
782
813
 
783
- 1. :class:`mindspore.dataset.utils.LineReader`: Use this class to initialize your text file object in the
814
+ :class:`mindspore.dataset.utils.LineReader`: Use this class to initialize your text file object in the
784
815
  `__init__` function. Then read the file content based on the line number of the object with the `__getitem__`
785
816
  function.
786
817
 
787
- - Input `source` accepts user-defined Python functions (PyFuncs), Do not add network computing operators from
788
- mindspore.nn and mindspore.ops or others into this `source` .
818
+ - Input `source` accepts user-defined Python functions (PyFuncs), and sets the multiprocessing start method
819
+ to `spawn` mode by ds.config.set_multiprocessing_start_method("spawn") with `python_ multiprocessing=True`
820
+ and `num_parallel_workers>1` supports adding network computing operators from mindspore.nn and mindspore.ops
821
+ or others into this `source`, otherwise adding to the `source` is not supported.
822
+ - When the user defined dataset by `source` calls the DVPP operator during dataset loading and processing,
823
+ the supported scenarios are as follows:
824
+
825
+ +---------------+----------------------------+----------------------------+----------------------------+
826
+ | | | Multiprocessing |
827
+ | | Multithreading +----------------------------+----------------------------+
828
+ | | | spawn | fork |
829
+ +===============+============================+============================+============================+
830
+ |Independent |Data Processing: support |Data Processing: support |Data Processing: support |
831
+ | | | | |
832
+ |process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
833
+ | |training: not support |training: support |training: not support |
834
+ +---------------+----------------------------+----------------------------+----------------------------+
835
+ |Non-independent|Data Processing: support |Data Processing: support |Data Processing: support |
836
+ | | | | |
837
+ |process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
838
+ | |training: support |training: support |training: not support |
839
+ +---------------+----------------------------+----------------------------+----------------------------+
840
+
789
841
  - The parameters `num_samples` , `shuffle` , `num_shards` , `shard_id` can be used to control the sampler
790
842
  used in the dataset, and their effects when combined with parameter `sampler` are as follows.
791
843
 
@@ -851,10 +903,10 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
851
903
  >>> dataset = ds.GeneratorDataset(source=[(np.array(0),), (np.array(1),), (np.array(2),)], column_names=["col"])
852
904
  """
853
905
 
854
- @check_generatordataset
906
+ @check_generator_dataset
855
907
  def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None,
856
908
  num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
857
- python_multiprocessing=True, max_rowsize=None):
909
+ python_multiprocessing=True, max_rowsize=None, batch_sampler=None, collate_fn=None):
858
910
  super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
859
911
  shuffle=shuffle, num_shards=num_shards, shard_id=shard_id)
860
912
  if isinstance(source, builtins.zip):
@@ -868,7 +920,8 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
868
920
  self.source = _GeneratorWrapper(self.source)
869
921
 
870
922
  self.prepared_source = None # source to be sent to C++
871
- if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
923
+ if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True and \
924
+ get_multiprocessing_start_method() == "fork":
872
925
  self.num_parallel_workers = 1
873
926
  logger.warning(
874
927
  "Input 'source' of 'GeneratorDataset' includes network computing operators like in mindspore.nn, "
@@ -895,18 +948,23 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
895
948
  self.schema = schema
896
949
  if not isinstance(schema, Schema):
897
950
  self.schema = Schema(schema)
951
+
952
+ self.has_batch_sampler = False
953
+ if batch_sampler is not None:
954
+ self.has_batch_sampler = True
955
+ if not isinstance(batch_sampler, samplers.BuiltinSampler):
956
+ self.sampler = samplers.IterSampler(batch_sampler)
957
+ else:
958
+ self.sampler = batch_sampler
959
+
898
960
  # Move get dataset_size by len from parse to here, because self.source will
899
961
  # lose attribution of '__len__' after deepcopy.
900
- self.source_len = -1 # unknown
901
- if hasattr(self.source, "__len__"):
902
- self.source_len = len(self.source)
903
-
904
- # if user defined sampler, update the self.source_len
905
- if isinstance(self.sampler, samplers.Sampler) or hasattr(self.sampler, "__iter__"):
906
- self.source_len = len(list(sampler))
962
+ self.source_len = len(self.source) if hasattr(self.source, "__len__") else -1
907
963
 
908
964
  self.max_rowsize = max_rowsize if max_rowsize is not None else -1
909
965
  self.sample_fn = None
966
+ # Ignore batch_info in the input parameter.
967
+ self.collate_fn = (lambda *args: collate_fn(*args[:-1])) if collate_fn is not None else None
910
968
 
911
969
  def __deepcopy__(self, memodict):
912
970
  if id(self) in memodict:
@@ -917,18 +975,20 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
917
975
  type_check(index, (int, np.number), "index")
918
976
  if not hasattr(self.source, "__getitem__"):
919
977
  raise RuntimeError("Dataset don't support randomized access.")
978
+ if self.has_batch_sampler:
979
+ raise RuntimeError("GeneratorDataset with batch_sampler does not support random access.")
920
980
  if not hasattr(self, "generator_op"):
921
981
  dataset = copy.deepcopy(self)
922
982
  self.prepared_source = _generator_fn_wrapper(_cpp_sampler_fn, self.source)
923
983
  if self.schema is None:
924
984
  dataset.generator_node = cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types,
925
- self.source_len, self.sampler, 1, None)
985
+ self.source_len, self.sampler, 1, None, False)
926
986
  else:
927
987
  schema = self.schema
928
988
  if isinstance(schema, Schema):
929
989
  schema = self.schema.cpp_schema
930
990
  dataset.generator_node = cde.GeneratorNode(self.prepared_source, schema, self.source_len,
931
- self.sampler, 1, None)
991
+ self.sampler, 1, None, False)
932
992
  self.generator_op = dataset.generator_node.Build()
933
993
  sample_id = self.generator_op.GetMappedIndex(index)
934
994
  return self.source[sample_id]
@@ -945,9 +1005,11 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
945
1005
 
946
1006
  def split(self, sizes, randomize=True):
947
1007
  if hasattr(self.source, "__getitem__"):
948
- # If the source has __getitem__ attribute, call the split method of MappableDataset.
949
- # Otherwise, call the split method of Dataset.
950
- return super().split(sizes, randomize)
1008
+ if not self.has_batch_sampler:
1009
+ # If the source has __getitem__ attribute, call the split method of MappableDataset.
1010
+ # Otherwise, call the split method of Dataset.
1011
+ return super().split(sizes, randomize)
1012
+ logger.warning("The performance of split will be degraded since batch_sampler is detected.")
951
1013
  return super(MappableDataset, self).split(sizes, randomize)
952
1014
 
953
1015
  def prepare_multiprocessing(self):
@@ -961,6 +1023,8 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
961
1023
 
962
1024
  if self.num_parallel_workers > 1 and not get_debug_mode():
963
1025
  self.__validate_memory_usage()
1026
+ # Starting multiple processes in spawn mode requires pickling source object
1027
+ self.source = _PickleGeneratorSource(self.source)
964
1028
 
965
1029
  sample_fn = SamplerFn(self.source, self.num_parallel_workers, self.python_multiprocessing,
966
1030
  self.max_rowsize)
@@ -984,12 +1048,12 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
984
1048
  self.prepare_multiprocessing()
985
1049
  if self.schema is None:
986
1050
  return cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types, self.source_len,
987
- self.sampler, self.num_parallel_workers, self.sample_fn)
1051
+ self.sampler, self.num_parallel_workers, self.sample_fn, self.has_batch_sampler)
988
1052
  schema = self.schema
989
1053
  if isinstance(schema, Schema):
990
1054
  schema = self.schema.cpp_schema
991
1055
  return cde.GeneratorNode(self.prepared_source, schema, self.source_len, self.sampler,
992
- self.num_parallel_workers, self.sample_fn)
1056
+ self.num_parallel_workers, self.sample_fn, self.has_batch_sampler)
993
1057
 
994
1058
  def __validate_memory_usage(self):
995
1059
  """
@@ -1107,6 +1171,8 @@ class NumpySlicesDataset(GeneratorDataset):
1107
1171
  Default: ``None`` , expected order behavior shown in the table below.
1108
1172
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1109
1173
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1174
+ Used in `data parallel training <https://www.mindspore.cn/tutorials/en/master/
1175
+ parallel/data_parallel.html#loading-datasets>`_ .
1110
1176
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This argument must be
1111
1177
  specified only when `num_shards` is also specified.
1112
1178
 
@@ -1149,7 +1215,7 @@ class NumpySlicesDataset(GeneratorDataset):
1149
1215
  >>> dataset = ds.NumpySlicesDataset(data=dict(df), shuffle=False)
1150
1216
  """
1151
1217
 
1152
- @check_numpyslicesdataset
1218
+ @check_numpy_slices_dataset
1153
1219
  def __init__(self, data, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None,
1154
1220
  num_shards=None, shard_id=None):
1155
1221
  dataset = _NumpySlicesDataset(data, column_names)
@@ -1202,7 +1268,7 @@ class PaddedDataset(GeneratorDataset):
1202
1268
  >>> dataset = ds.PaddedDataset(padded_samples=data)
1203
1269
  """
1204
1270
 
1205
- @check_paddeddataset
1271
+ @check_padded_dataset
1206
1272
  def __init__(self, padded_samples):
1207
1273
  dataset = _PaddedDataset(padded_samples)
1208
1274
  super().__init__(dataset, column_names=dataset.column_names, num_shards=None, shard_id=None, shuffle=False)