mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.5.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (366) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +8 -3
  3. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +0 -5
  7. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  8. mindspore/_extends/parse/compile_config.py +64 -0
  9. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  10. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
  11. mindspore/_extends/parse/parser.py +23 -5
  12. mindspore/_extends/parse/standard_method.py +123 -27
  13. mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
  14. mindspore/amp.py +7 -1
  15. mindspore/avcodec-59.dll +0 -0
  16. mindspore/avdevice-59.dll +0 -0
  17. mindspore/avfilter-8.dll +0 -0
  18. mindspore/avformat-59.dll +0 -0
  19. mindspore/avutil-57.dll +0 -0
  20. mindspore/boost/boost_cell_wrapper.py +136 -41
  21. mindspore/common/__init__.py +3 -1
  22. mindspore/common/_register_for_tensor.py +0 -1
  23. mindspore/common/_stub_tensor.py +25 -4
  24. mindspore/common/_tensor_cpp_method.py +17 -0
  25. mindspore/common/_tensor_docs.py +6132 -0
  26. mindspore/common/api.py +98 -21
  27. mindspore/common/dtype.py +34 -34
  28. mindspore/common/dump.py +2 -1
  29. mindspore/common/file_system.py +8 -3
  30. mindspore/common/generator.py +2 -0
  31. mindspore/common/hook_handle.py +3 -1
  32. mindspore/common/initializer.py +3 -4
  33. mindspore/common/lazy_inline.py +8 -2
  34. mindspore/common/mindir_util.py +10 -2
  35. mindspore/common/parameter.py +31 -15
  36. mindspore/common/tensor.py +713 -1337
  37. mindspore/communication/__init__.py +1 -1
  38. mindspore/communication/_comm_helper.py +5 -0
  39. mindspore/communication/comm_func.py +215 -173
  40. mindspore/communication/management.py +23 -20
  41. mindspore/context.py +285 -191
  42. mindspore/dataset/__init__.py +23 -19
  43. mindspore/dataset/callback/ds_callback.py +2 -1
  44. mindspore/dataset/core/config.py +84 -3
  45. mindspore/dataset/engine/cache_admin.py +3 -3
  46. mindspore/dataset/engine/cache_client.py +5 -4
  47. mindspore/dataset/engine/datasets.py +192 -149
  48. mindspore/dataset/engine/datasets_audio.py +14 -0
  49. mindspore/dataset/engine/datasets_standard_format.py +11 -11
  50. mindspore/dataset/engine/datasets_text.py +38 -1
  51. mindspore/dataset/engine/datasets_user_defined.py +100 -66
  52. mindspore/dataset/engine/datasets_vision.py +81 -8
  53. mindspore/dataset/engine/iterators.py +281 -63
  54. mindspore/dataset/engine/obs/util.py +8 -0
  55. mindspore/dataset/engine/queue.py +40 -0
  56. mindspore/dataset/engine/samplers.py +26 -2
  57. mindspore/dataset/engine/serializer_deserializer.py +1 -1
  58. mindspore/dataset/engine/validators.py +43 -11
  59. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  60. mindspore/dataset/transforms/transforms.py +29 -12
  61. mindspore/dataset/vision/validators.py +1 -2
  62. mindspore/device_context/__init__.py +21 -0
  63. mindspore/device_context/ascend/__init__.py +25 -0
  64. mindspore/device_context/ascend/device.py +72 -0
  65. mindspore/device_context/ascend/op_debug.py +94 -0
  66. mindspore/device_context/ascend/op_precision.py +193 -0
  67. mindspore/device_context/ascend/op_tuning.py +127 -0
  68. mindspore/device_context/cpu/__init__.py +25 -0
  69. mindspore/device_context/cpu/device.py +62 -0
  70. mindspore/device_context/cpu/op_tuning.py +43 -0
  71. mindspore/device_context/gpu/__init__.py +21 -0
  72. mindspore/device_context/gpu/device.py +70 -0
  73. mindspore/device_context/gpu/op_precision.py +67 -0
  74. mindspore/device_context/gpu/op_tuning.py +175 -0
  75. mindspore/device_manager.py +134 -0
  76. mindspore/dnnl.dll +0 -0
  77. mindspore/experimental/llm_boost/__init__.py +1 -0
  78. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  79. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  80. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  81. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  82. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  83. mindspore/experimental/llm_boost/register.py +1 -0
  84. mindspore/experimental/optim/adadelta.py +26 -22
  85. mindspore/experimental/optim/adam.py +3 -0
  86. mindspore/experimental/optim/lr_scheduler.py +33 -24
  87. mindspore/experimental/optim/radam.py +33 -30
  88. mindspore/hal/device.py +28 -0
  89. mindspore/hal/event.py +17 -0
  90. mindspore/hal/memory.py +94 -3
  91. mindspore/hal/stream.py +91 -6
  92. mindspore/include/api/context.h +0 -1
  93. mindspore/jpeg62.dll +0 -0
  94. mindspore/log.py +12 -0
  95. mindspore/mindrecord/__init__.py +1 -1
  96. mindspore/mindrecord/config.py +17 -316
  97. mindspore/mindrecord/filereader.py +1 -9
  98. mindspore/mindrecord/filewriter.py +5 -15
  99. mindspore/mindrecord/mindpage.py +1 -9
  100. mindspore/mindspore_backend.dll +0 -0
  101. mindspore/mindspore_common.dll +0 -0
  102. mindspore/mindspore_core.dll +0 -0
  103. mindspore/mindspore_glog.dll +0 -0
  104. mindspore/mindspore_ops.dll +0 -0
  105. mindspore/mint/__init__.py +824 -218
  106. mindspore/mint/distributed/__init__.py +66 -4
  107. mindspore/mint/distributed/distributed.py +2594 -44
  108. mindspore/mint/linalg/__init__.py +6 -0
  109. mindspore/mint/nn/__init__.py +473 -14
  110. mindspore/mint/nn/functional.py +486 -11
  111. mindspore/mint/nn/layer/__init__.py +17 -4
  112. mindspore/mint/nn/layer/_functions.py +330 -0
  113. mindspore/mint/nn/layer/activation.py +169 -1
  114. mindspore/mint/nn/layer/basic.py +123 -0
  115. mindspore/mint/nn/layer/conv.py +727 -0
  116. mindspore/mint/nn/layer/normalization.py +215 -19
  117. mindspore/mint/nn/layer/padding.py +797 -0
  118. mindspore/mint/nn/layer/pooling.py +170 -0
  119. mindspore/mint/optim/__init__.py +2 -1
  120. mindspore/mint/optim/adam.py +223 -0
  121. mindspore/mint/optim/adamw.py +26 -19
  122. mindspore/mint/special/__init__.py +2 -1
  123. mindspore/multiprocessing/__init__.py +5 -0
  124. mindspore/nn/cell.py +126 -19
  125. mindspore/nn/dynamic_lr.py +2 -1
  126. mindspore/nn/layer/activation.py +6 -6
  127. mindspore/nn/layer/basic.py +35 -25
  128. mindspore/nn/layer/channel_shuffle.py +3 -3
  129. mindspore/nn/layer/embedding.py +3 -3
  130. mindspore/nn/layer/normalization.py +8 -7
  131. mindspore/nn/layer/padding.py +4 -3
  132. mindspore/nn/layer/pooling.py +47 -13
  133. mindspore/nn/layer/rnn_cells.py +1 -1
  134. mindspore/nn/layer/rnns.py +2 -1
  135. mindspore/nn/layer/timedistributed.py +5 -5
  136. mindspore/nn/layer/transformer.py +48 -26
  137. mindspore/nn/learning_rate_schedule.py +5 -3
  138. mindspore/nn/loss/loss.py +31 -36
  139. mindspore/nn/optim/ada_grad.py +1 -0
  140. mindspore/nn/optim/adadelta.py +2 -2
  141. mindspore/nn/optim/adam.py +1 -1
  142. mindspore/nn/optim/lars.py +1 -4
  143. mindspore/nn/optim/optimizer.py +1 -1
  144. mindspore/nn/optim/rprop.py +2 -2
  145. mindspore/nn/optim/thor.py +2 -1
  146. mindspore/nn/utils/init.py +13 -11
  147. mindspore/nn/wrap/cell_wrapper.py +4 -6
  148. mindspore/nn/wrap/loss_scale.py +3 -4
  149. mindspore/numpy/array_creations.py +60 -62
  150. mindspore/numpy/array_ops.py +148 -143
  151. mindspore/numpy/logic_ops.py +41 -42
  152. mindspore/numpy/math_ops.py +361 -359
  153. mindspore/numpy/utils.py +16 -16
  154. mindspore/numpy/utils_const.py +4 -4
  155. mindspore/opencv_core452.dll +0 -0
  156. mindspore/opencv_imgcodecs452.dll +0 -0
  157. mindspore/opencv_imgproc452.dll +0 -0
  158. mindspore/ops/__init__.py +2 -1
  159. mindspore/ops/_grad_experimental/grad_comm_ops.py +94 -13
  160. mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
  161. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  162. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  163. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  164. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  165. mindspore/ops/_vmap/vmap_array_ops.py +20 -19
  166. mindspore/ops/_vmap/vmap_base.py +0 -2
  167. mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
  168. mindspore/ops/_vmap/vmap_math_ops.py +11 -9
  169. mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
  170. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
  171. mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
  172. mindspore/ops/auto_generate/gen_extend_func.py +554 -60
  173. mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
  174. mindspore/ops/auto_generate/gen_ops_prim.py +8024 -3409
  175. mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
  176. mindspore/ops/composite/base.py +1 -1
  177. mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
  178. mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
  179. mindspore/ops/function/__init__.py +12 -0
  180. mindspore/ops/function/array_func.py +561 -159
  181. mindspore/ops/function/clip_func.py +64 -0
  182. mindspore/ops/function/debug_func.py +28 -20
  183. mindspore/ops/function/image_func.py +1 -1
  184. mindspore/ops/function/linalg_func.py +5 -4
  185. mindspore/ops/function/math_func.py +1659 -290
  186. mindspore/ops/function/nn_func.py +988 -317
  187. mindspore/ops/function/parameter_func.py +3 -56
  188. mindspore/ops/function/random_func.py +243 -33
  189. mindspore/ops/function/sparse_unary_func.py +1 -1
  190. mindspore/ops/functional.py +18 -5
  191. mindspore/ops/functional_overload.py +897 -0
  192. mindspore/ops/operations/__init__.py +3 -2
  193. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  194. mindspore/ops/operations/_grad_ops.py +2 -34
  195. mindspore/ops/operations/_infer_ops.py +2 -1
  196. mindspore/ops/operations/_inner_ops.py +38 -8
  197. mindspore/ops/operations/array_ops.py +45 -303
  198. mindspore/ops/operations/comm_ops.py +19 -16
  199. mindspore/ops/operations/custom_ops.py +11 -55
  200. mindspore/ops/operations/debug_ops.py +42 -47
  201. mindspore/ops/operations/inner_ops.py +6 -4
  202. mindspore/ops/operations/linalg_ops.py +3 -2
  203. mindspore/ops/operations/manually_defined/ops_def.py +185 -104
  204. mindspore/ops/operations/math_ops.py +11 -216
  205. mindspore/ops/operations/nn_ops.py +146 -308
  206. mindspore/ops/primitive.py +23 -21
  207. mindspore/ops/tensor_method.py +1669 -0
  208. mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
  209. mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
  210. mindspore/ops_generate/arg_handler.py +0 -61
  211. mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
  212. mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
  213. mindspore/ops_generate/base_generator.py +11 -0
  214. mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
  215. mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
  216. mindspore/ops_generate/functional_overload_py_generator.py +110 -0
  217. mindspore/ops_generate/functions_cc_generator.py +233 -0
  218. mindspore/ops_generate/gen_aclnn_implement.py +110 -114
  219. mindspore/ops_generate/gen_constants.py +157 -3
  220. mindspore/ops_generate/gen_ops.py +245 -990
  221. mindspore/ops_generate/gen_pyboost_func.py +97 -998
  222. mindspore/ops_generate/gen_utils.py +119 -33
  223. mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
  224. mindspore/ops_generate/op_api_proto.py +206 -0
  225. mindspore/ops_generate/op_def_py_generator.py +131 -0
  226. mindspore/ops_generate/op_prim_py_generator.py +480 -0
  227. mindspore/ops_generate/op_proto.py +373 -108
  228. mindspore/ops_generate/op_template_parser.py +436 -0
  229. mindspore/ops_generate/ops_def_cc_generator.py +288 -0
  230. mindspore/ops_generate/ops_def_h_generator.py +74 -0
  231. mindspore/ops_generate/ops_name_h_generator.py +68 -0
  232. mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
  233. mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
  234. mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
  235. mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
  236. mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
  237. mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
  238. mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
  239. mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
  240. mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
  241. mindspore/ops_generate/pyboost_utils.py +92 -33
  242. mindspore/ops_generate/template.py +294 -44
  243. mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
  244. mindspore/parallel/__init__.py +3 -3
  245. mindspore/parallel/_auto_parallel_context.py +24 -33
  246. mindspore/parallel/_parallel_serialization.py +13 -2
  247. mindspore/parallel/_utils.py +4 -1
  248. mindspore/parallel/algo_parameter_config.py +1 -1
  249. mindspore/parallel/checkpoint_transform.py +44 -0
  250. mindspore/parallel/cluster/process_entity/_api.py +131 -37
  251. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  252. mindspore/parallel/cluster/run.py +20 -3
  253. mindspore/parallel/parameter_broadcast.py +1 -1
  254. mindspore/parallel/shard.py +3 -0
  255. mindspore/parallel/transform_safetensors.py +119 -253
  256. mindspore/profiler/__init__.py +17 -4
  257. mindspore/profiler/analysis/__init__.py +0 -0
  258. mindspore/profiler/analysis/parser/__init__.py +0 -0
  259. mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
  260. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  261. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  262. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  263. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  264. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  265. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
  266. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  267. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
  268. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  269. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  270. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  271. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  272. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  273. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  274. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  275. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  276. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  277. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  278. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
  279. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  280. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  281. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  282. mindspore/profiler/analysis/task_manager.py +131 -0
  283. mindspore/profiler/analysis/time_converter.py +84 -0
  284. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  285. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
  286. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  287. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
  288. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
  289. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
  290. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
  291. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  292. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  293. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
  294. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  295. mindspore/profiler/analysis/work_flow.py +73 -0
  296. mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
  297. mindspore/profiler/common/command_executor.py +90 -0
  298. mindspore/profiler/common/constant.py +174 -3
  299. mindspore/profiler/common/file_manager.py +208 -0
  300. mindspore/profiler/common/log.py +130 -0
  301. mindspore/profiler/common/msprof_cmd_tool.py +202 -0
  302. mindspore/profiler/common/path_manager.py +371 -0
  303. mindspore/profiler/common/process_bar.py +168 -0
  304. mindspore/profiler/common/process_pool.py +9 -3
  305. mindspore/profiler/common/profiler_context.py +476 -0
  306. mindspore/profiler/common/profiler_info.py +304 -0
  307. mindspore/profiler/common/profiler_output_path.py +284 -0
  308. mindspore/profiler/common/profiler_parameters.py +210 -0
  309. mindspore/profiler/common/profiler_path_manager.py +120 -0
  310. mindspore/profiler/common/record_function.py +76 -0
  311. mindspore/profiler/common/tlv_decoder.py +76 -0
  312. mindspore/profiler/common/util.py +75 -2
  313. mindspore/profiler/dynamic_profiler.py +270 -37
  314. mindspore/profiler/envprofiler.py +138 -0
  315. mindspore/profiler/mstx.py +199 -0
  316. mindspore/profiler/platform/__init__.py +21 -0
  317. mindspore/profiler/platform/base_profiler.py +40 -0
  318. mindspore/profiler/platform/cpu_profiler.py +124 -0
  319. mindspore/profiler/platform/gpu_profiler.py +74 -0
  320. mindspore/profiler/platform/npu_profiler.py +309 -0
  321. mindspore/profiler/profiler.py +580 -93
  322. mindspore/profiler/profiler_action_controller.py +187 -0
  323. mindspore/profiler/profiler_interface.py +114 -0
  324. mindspore/profiler/schedule.py +208 -0
  325. mindspore/rewrite/api/symbol_tree.py +1 -2
  326. mindspore/run_check/_check_version.py +2 -6
  327. mindspore/runtime/__init__.py +37 -0
  328. mindspore/runtime/device.py +27 -0
  329. mindspore/runtime/event.py +209 -0
  330. mindspore/runtime/executor.py +148 -0
  331. mindspore/runtime/memory.py +392 -0
  332. mindspore/runtime/stream.py +460 -0
  333. mindspore/runtime/thread_bind_core.py +401 -0
  334. mindspore/swresample-4.dll +0 -0
  335. mindspore/swscale-6.dll +0 -0
  336. mindspore/tinyxml2.dll +0 -0
  337. mindspore/train/__init__.py +2 -2
  338. mindspore/train/_utils.py +53 -18
  339. mindspore/train/amp.py +8 -4
  340. mindspore/train/callback/_checkpoint.py +32 -18
  341. mindspore/train/callback/_early_stop.py +1 -1
  342. mindspore/train/callback/_flops_collector.py +105 -69
  343. mindspore/train/callback/_history.py +1 -1
  344. mindspore/train/callback/_summary_collector.py +44 -6
  345. mindspore/train/callback/_tft_register.py +31 -10
  346. mindspore/train/dataset_helper.py +11 -11
  347. mindspore/train/metrics/precision.py +4 -5
  348. mindspore/train/mind_ir_pb2.py +167 -46
  349. mindspore/train/model.py +13 -15
  350. mindspore/train/serialization.py +462 -76
  351. mindspore/train/summary/summary_record.py +1 -2
  352. mindspore/train/train_thor/model_thor.py +1 -1
  353. mindspore/turbojpeg.dll +0 -0
  354. mindspore/utils/__init__.py +4 -2
  355. mindspore/utils/dryrun.py +138 -0
  356. mindspore/utils/runtime_execution_order_check.py +550 -0
  357. mindspore/version.py +1 -1
  358. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/METADATA +2 -3
  359. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/RECORD +362 -238
  360. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
  361. mindspore/common/_tensor_overload.py +0 -139
  362. mindspore/mindspore_np_dtype.dll +0 -0
  363. mindspore/profiler/envprofiling.py +0 -254
  364. mindspore/profiler/profiling.py +0 -1926
  365. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
  366. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
@@ -34,7 +34,7 @@ from .datasets_user_defined import GeneratorDataset
34
34
  from .obs.obs_mindrecord_dataset import MindRecordFromOBS
35
35
  from .validators import check_csvdataset, check_minddataset, check_tfrecorddataset, check_obsminddataset
36
36
  from ..core.validator_helpers import type_check
37
- from ...mindrecord.config import _get_enc_key, _get_dec_mode, _get_hash_mode, decrypt, verify_file_hash
37
+ from ...mindrecord.config import _get_enc_key, _get_dec_mode, decrypt
38
38
 
39
39
 
40
40
  from ..core.validator_helpers import replace_none
@@ -75,6 +75,8 @@ class CSVDataset(SourceDataset, UnionBaseDataset):
75
75
 
76
76
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
77
77
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
78
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
79
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
78
80
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None``. This
79
81
  argument can only be specified when `num_shards` is also specified.
80
82
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -143,6 +145,8 @@ class MindDataset(MappableDataset, UnionBaseDataset):
143
145
 
144
146
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
145
147
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
148
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
149
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
146
150
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
147
151
  argument can only be specified when `num_shards` is also specified.
148
152
  sampler (Sampler, optional): Object used to choose samples from the
@@ -255,19 +259,15 @@ class MindDataset(MappableDataset, UnionBaseDataset):
255
259
 
256
260
  # do decrypt & integrity check
257
261
  if not isinstance(self.dataset_files, list):
258
- if _get_enc_key() is not None or _get_hash_mode() is not None:
262
+ if _get_enc_key() is not None:
259
263
  logger.warning("When a single mindrecord file which is generated by " +
260
264
  "`mindspore.mindrecord.FileWriter` with `shard_num` > 1 is used as the input, " +
261
- "enabling decryption/integrity check may fail. Please use file list as the input.")
265
+ "enabling decryption check may fail. Please use file list as the input.")
262
266
 
263
267
  # decrypt the data file and index file
264
268
  index_file_name = self.dataset_files + ".db"
265
269
  self.dataset_files = decrypt(self.dataset_files, _get_enc_key(), _get_dec_mode())
266
270
  decrypt(index_file_name, _get_enc_key(), _get_dec_mode())
267
-
268
- # verify integrity check
269
- verify_file_hash(self.dataset_files)
270
- verify_file_hash(self.dataset_files + ".db")
271
271
  else:
272
272
  file_tuple = []
273
273
  for item in self.dataset_files:
@@ -276,10 +276,6 @@ class MindDataset(MappableDataset, UnionBaseDataset):
276
276
  decrypt_filename = decrypt(item, _get_enc_key(), _get_dec_mode())
277
277
  file_tuple.append(decrypt_filename)
278
278
  decrypt(index_file_name, _get_enc_key(), _get_dec_mode())
279
-
280
- # verify integrity check
281
- verify_file_hash(decrypt_filename)
282
- verify_file_hash(decrypt_filename + ".db")
283
279
  self.dataset_files = file_tuple
284
280
 
285
281
  self.columns_list = replace_none(columns_list, [])
@@ -361,6 +357,8 @@ class TFRecordDataset(SourceDataset, UnionBaseDataset):
361
357
  num_shards (int, optional): Number of shards that the dataset will be divided
362
358
  into. Default: ``None`` . When this argument is specified, `num_samples` reflects
363
359
  the maximum sample number per shard.
360
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
361
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
364
362
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
365
363
  argument can only be specified when `num_shards` is also specified.
366
364
  shard_equal_rows (bool, optional): Get equal rows for all shards. Default: ``False``. If `shard_equal_rows`
@@ -476,6 +474,8 @@ class OBSMindDataset(GeneratorDataset):
476
474
 
477
475
  num_shards (int, optional): Number of shards that the dataset will be divided
478
476
  into. Default: ``None`` .
477
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
478
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
479
479
  shard_id (int, optional): The shard ID within num_shards. Default: ``None`` . This
480
480
  argument can only be specified when `num_shards` is also specified.
481
481
  shard_equal_rows (bool, optional): Get equal rows for all shards. Default: ``True``. If shard_equal_rows
@@ -67,6 +67,8 @@ class AGNewsDataset(SourceDataset, TextBaseDataset):
67
67
  num_shards (int, optional): Number of shards that the dataset will be divided into.
68
68
  Default: ``None``. When this argument is specified, `num_samples` reflects the
69
69
  max sample number of per shard.
70
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
71
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
70
72
  shard_id (int, optional): The shard ID within `num_shards` . This
71
73
  argument can only be specified when `num_shards` is also specified. Default: ``None``.
72
74
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -175,6 +177,8 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
175
177
 
176
178
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
177
179
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
180
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
181
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
178
182
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
179
183
  argument can only be specified when `num_shards` is also specified.
180
184
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -273,6 +277,8 @@ class CLUEDataset(SourceDataset, TextBaseDataset):
273
277
 
274
278
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
275
279
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
280
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
281
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
276
282
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
277
283
  argument can only be specified when `num_shards` is also specified.
278
284
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -515,7 +521,8 @@ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
515
521
 
516
522
  num_shards (int, optional): Number of shards that the dataset will be divided into.
517
523
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
518
- Default: ``None`` .
524
+ Default: ``None`` . Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
525
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
519
526
  shard_id (int, optional): The shard ID within `num_shards` . This
520
527
  argument can only be specified when `num_shards` is also specified. Default: ``None`` .
521
528
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -618,6 +625,8 @@ class DBpediaDataset(SourceDataset, TextBaseDataset):
618
625
 
619
626
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
620
627
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
628
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
629
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
621
630
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
622
631
  argument can only be specified when `num_shards` is also specified.
623
632
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -717,6 +726,8 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset):
717
726
 
718
727
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
719
728
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
729
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
730
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
720
731
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
721
732
  argument can only be specified when `num_shards` is also specified.
722
733
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -805,6 +816,8 @@ class IMDBDataset(MappableDataset, TextBaseDataset):
805
816
  num_shards (int, optional): Number of shards that the dataset will be divided
806
817
  into. Default: ``None`` . When this argument is specified, `num_samples` reflects
807
818
  the maximum sample number of per shard.
819
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
820
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
808
821
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
809
822
  argument can only be specified when `num_shards` is also specified.
810
823
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -941,6 +954,8 @@ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
941
954
 
942
955
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
943
956
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
957
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
958
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
944
959
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
945
960
  argument can only be specified when `num_shards` is also specified.
946
961
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -1073,6 +1088,8 @@ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
1073
1088
 
1074
1089
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1075
1090
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1091
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1092
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1076
1093
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1077
1094
  argument can only be specified when `num_shards` is also specified.
1078
1095
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -1181,6 +1198,8 @@ class Multi30kDataset(SourceDataset, TextBaseDataset):
1181
1198
  num_shards (int, optional): Number of shards that the dataset will be divided
1182
1199
  into. Default: ``None`` . When this argument is specified, `num_samples` reflects
1183
1200
  the max sample number of per shard.
1201
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1202
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1184
1203
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1185
1204
  argument can only be specified when `num_shards` is also specified.
1186
1205
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1290,6 +1309,8 @@ class PennTreebankDataset(SourceDataset, TextBaseDataset):
1290
1309
 
1291
1310
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1292
1311
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1312
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1313
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1293
1314
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1294
1315
  argument can only be specified when `num_shards` is also specified.
1295
1316
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1389,6 +1410,8 @@ class SogouNewsDataset(SourceDataset, TextBaseDataset):
1389
1410
  - ``Shuffle.FILES`` : Shuffle files only.
1390
1411
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1391
1412
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1413
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1414
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1392
1415
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1393
1416
  argument can only be specified when `num_shards` is also specified.
1394
1417
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -1490,6 +1513,8 @@ class SQuADDataset(SourceDataset, TextBaseDataset):
1490
1513
 
1491
1514
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1492
1515
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1516
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1517
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1493
1518
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1494
1519
  argument can only be specified when `num_shards` is also specified.
1495
1520
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1608,6 +1633,8 @@ class SST2Dataset(SourceDataset, TextBaseDataset):
1608
1633
 
1609
1634
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1610
1635
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1636
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1637
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1611
1638
  shard_id (int, optional): The shard ID within `num_shards`. This argument can only be specified when
1612
1639
  `num_shards` is also specified. Default: ``None`` .
1613
1640
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1711,6 +1738,8 @@ class TextFileDataset(SourceDataset, TextBaseDataset):
1711
1738
 
1712
1739
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1713
1740
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1741
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1742
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1714
1743
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1715
1744
  argument can only be specified when `num_shards` is also specified.
1716
1745
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1775,6 +1804,8 @@ class UDPOSDataset(SourceDataset, TextBaseDataset):
1775
1804
 
1776
1805
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1777
1806
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1807
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1808
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1778
1809
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1779
1810
  argument can only be specified when `num_shards` is also specified.
1780
1811
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -1861,6 +1892,8 @@ class WikiTextDataset(SourceDataset, TextBaseDataset):
1861
1892
 
1862
1893
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1863
1894
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1895
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1896
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1864
1897
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1865
1898
  argument can only be specified when `num_shards` is also specified.
1866
1899
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -1958,6 +1991,8 @@ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
1958
1991
 
1959
1992
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1960
1993
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1994
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1995
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1961
1996
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1962
1997
  argument can only be specified when `num_shards` is also specified.
1963
1998
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
@@ -2058,6 +2093,8 @@ class YelpReviewDataset(SourceDataset, TextBaseDataset):
2058
2093
  - ``Shuffle.FILES`` : Shuffle files only.
2059
2094
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
2060
2095
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
2096
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
2097
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
2061
2098
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2062
2099
  argument can only be specified when `num_shards` is also specified.
2063
2100
  num_parallel_workers (int, optional): Number of worker threads to read the data.
@@ -28,12 +28,10 @@ import signal
28
28
  import time
29
29
  from types import GeneratorType
30
30
  import multiprocessing
31
- from multiprocessing.util import Finalize
32
31
  import queue
33
32
  from functools import partial
34
33
  import subprocess
35
34
  import threading
36
- import weakref
37
35
  import platform
38
36
  import psutil
39
37
  import numpy as np
@@ -46,7 +44,7 @@ from mindspore import log as logger
46
44
  from .datasets import UnionBaseDataset, MappableDataset, Schema, to_list, _PythonMultiprocessing, _check_shm_usage
47
45
  from . import samplers
48
46
  from .queue import _SharedQueue
49
- from .validators import check_generatordataset, check_numpyslicesdataset, check_paddeddataset
47
+ from .validators import check_generator_dataset, check_numpy_slices_dataset, check_padded_dataset
50
48
  from ..core.config import get_enable_shared_mem, get_prefetch_size, get_multiprocessing_timeout_interval, \
51
49
  get_enable_watchdog, get_debug_mode, get_seed, set_seed
52
50
  from ..core.datatypes import mstypelist_to_detypelist
@@ -221,7 +219,6 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
221
219
  self.ppid = os.getpid()
222
220
  self.pids = []
223
221
  self.check_interval = get_multiprocessing_timeout_interval() # the interval of check queue's size
224
- self._final_join = True
225
222
 
226
223
  # Event for end of epoch
227
224
  if self.multi_process is True:
@@ -272,8 +269,14 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
272
269
  worker.daemon = True
273
270
  self.need_join = True
274
271
  self.workers.append(worker)
275
- if self.multi_process and platform.system().lower() != 'windows':
276
- self._launch_cleanup_worker()
272
+
273
+ if self.multi_process:
274
+ logger.info("Launch generator worker process(es): {}".format([worker.pid for worker in self.workers]))
275
+ if platform.system().lower() != 'windows':
276
+ self._launch_monitor()
277
+
278
+ def terminate(self):
279
+ self._stop_subprocess()
277
280
 
278
281
  def _interval_log(self, i, start_time, wait_count):
279
282
  cost_time = int(time.time()) - start_time
@@ -394,9 +397,11 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
394
397
  "the `mindspore.dataset.config.set_multiprocessing_timeout_interval` interface."
395
398
  logger.warning(warning_message)
396
399
 
397
- def _launch_cleanup_worker(self):
400
+ def _launch_monitor(self):
398
401
  """
399
- We need a extra thread and process if main process or subprocess was killed.
402
+ Launch a clean process and register subprocess to be monitored by the watch dog.
403
+ The clean process will clean up subprocesses when main process exited.
404
+ The watch dog will clean up subprocesses and main process when any subprocess exited.
400
405
  """
401
406
  _clean_worker_func = _PythonMultiprocessing._clean_process # pylint: disable=W0212
402
407
  self.cleaning_process = multiprocessing.Process(target=_clean_worker_func,
@@ -404,21 +409,13 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
404
409
  args=(self.ppid, self.workers, self.eof))
405
410
  self.cleaning_process.daemon = True
406
411
  self.cleaning_process.start()
412
+ logger.info("Launch clean process {} to monitor worker "
413
+ "process(es): {}".format(self.cleaning_process.pid, [worker.pid for worker in self.workers]))
407
414
 
408
415
  if get_enable_watchdog():
409
- self.eot = threading.Event()
410
- self.watch_dog = threading.Thread(target=_PythonMultiprocessing._watch_dog, # pylint: disable=W0212
411
- name="GeneratorWatchDog",
412
- args=(self.eot, self.workers + [self.cleaning_process]))
413
- self.watch_dog.daemon = True
414
- self.watch_dog.start()
415
-
416
- if self._final_join is True:
417
- self._jointhread = Finalize(
418
- self.watch_dog, self._finalize_join,
419
- args=(weakref.ref(self.watch_dog), self.eot),
420
- exitpriority=-5
421
- )
416
+ worker_ids = [worker.pid for worker in self.workers]
417
+ worker_ids.append(self.cleaning_process.pid)
418
+ cde.register_worker_pids(id(self), set(worker_ids))
422
419
 
423
420
  def _release_fd(self):
424
421
  """Release the file descriptor by subprocess"""
@@ -454,15 +451,8 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
454
451
  def _stop_subprocess(self):
455
452
  """Only the main process can call join. All the sub-process / sub-thread will be stopped."""
456
453
  if self.need_join is True and self.ppid == os.getpid():
457
- # the sub-process / sub-thread will stop by self.eof.set()
458
- if hasattr(self, 'eof') and self.eof is not None:
459
- try:
460
- self.eof.set()
461
- except AttributeError: # maybe occur "'NoneType' object has no attribute 'maxsize'"
462
- pass
463
-
464
- # close the watch dog first
465
- self._abort_watchdog()
454
+ # abort the monitor first
455
+ self._abort_monitor()
466
456
  self.need_join = False
467
457
 
468
458
  # waiting for the sub-process stop
@@ -489,10 +479,12 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
489
479
  self.workers.clear()
490
480
  self.workers = None
491
481
 
492
- def _abort_watchdog(self):
493
- """Let watchdog quit."""
494
- if hasattr(self, 'eot') and self.eot is not None and not self.eot.is_set():
495
- self.eot.set()
482
+ def _abort_monitor(self):
483
+ """Deregister workers monitored by the watch dog and join clean process."""
484
+ if get_enable_watchdog():
485
+ cde.deregister_worker_pids(id(self))
486
+ if hasattr(self, 'eof') and self.eof is not None:
487
+ self.eof.set()
496
488
  if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
497
489
  # let the quit event notify the cleaning process to exit
498
490
  self.cleaning_process.join(timeout=5)
@@ -503,14 +495,6 @@ class SamplerFn(cde.PythonMultiprocessingRuntime):
503
495
  if hasattr(self, 'count'):
504
496
  del self.count
505
497
 
506
- @classmethod
507
- def _finalize_join(cls, twr, eot):
508
- thread = twr()
509
- if thread is not None:
510
- if eot is not None and not eot.is_set():
511
- eot.set()
512
- thread.join()
513
-
514
498
  def __del__(self):
515
499
  try:
516
500
  self._stop_subprocess()
@@ -554,7 +538,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof, is_multiproces
554
538
  cde.register_worker_handlers()
555
539
 
556
540
  if is_multiprocessing:
557
- result_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
541
+ result_queue.cancel_join_thread() # Ensure that the process does not hang when exiting
558
542
  signal.signal(signal.SIGTERM, partial(_subprocess_handle, eof))
559
543
 
560
544
  # init the random seed and np.random seed for the subprocess
@@ -694,6 +678,7 @@ class _GeneratorWorkerMp(multiprocessing.Process):
694
678
 
695
679
  class _GeneratorWrapper:
696
680
  """Wrapper the generator so that it can be iterated multiple times in GeneratorDataset."""
681
+
697
682
  def __init__(self, generator):
698
683
  self.generator = generator
699
684
  self.generator_new, self.generator = itertools.tee(self.generator)
@@ -713,13 +698,22 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
713
698
  The column names and column types of generated dataset depend on Python data defined by users.
714
699
 
715
700
  Args:
716
- source (Union[Callable, Iterable, Random Accessible]):
717
- A generator callable object, an iterable Python object or a random accessible Python object.
718
- Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
719
- Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
720
- iter(source).next().
721
- Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
722
- source[idx].
701
+ source (Union[Random Accessible, Iterable]): A custom dataset from which to load the data.
702
+ MindSpore supports the following types of datasets:
703
+
704
+ - Random-accessible (map-style) datasets: A dataset object that implements the `__getitem__()`
705
+ and `__len__()` methods, represents a mapping from indexes/keys to data samples.
706
+ For example, such a dataset `source`, when accessed with `source[idx]`, can read the idx-th sample
707
+ from disk, see `Random-accessible dataset example <https://www.mindspore.cn/tutorials/en/master/
708
+ beginner/dataset.html#random-accessible-dataset>`_ for details.
709
+
710
+ - Iterable-style dataset: An iterable dataset object that implements `__iter__()` and `__next__()` methods,
711
+ represents an iterable over data samples. This type of dataset is suitable for situations where
712
+ random reads are costly or even impossible, and where batch sizes depend on the data being acquired.
713
+ For example, such a dataset `source`, when accessed `iter(source)`, can return a stream of data reading
714
+ from a database or remote server, see `Iterable-style dataset example
715
+ <https://www.mindspore.cn/tutorials/en/master/beginner/dataset.html#iterable-dataset>`_ for details.
716
+
723
717
  column_names (Union[str, list[str]], optional): List of column names of the dataset. Default: ``None`` .
724
718
  Users are required to provide either column_names or schema.
725
719
  column_types (list[mindspore.dtype], optional): List of column data types of the dataset. Default: ``None`` .
@@ -737,7 +731,8 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
737
731
  input is required. Default: ``None`` , expected order behavior shown in the table below.
738
732
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
739
733
  Random accessible input is required. When this argument is specified, `num_samples` reflects the maximum
740
- sample number of per shard.
734
+ sample number of per shard. Used in `data parallel training <https://www.mindspore.cn/docs/en/master/
735
+ model_train/parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
741
736
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` .
742
737
  This argument must be specified only when `num_shards` is also specified.
743
738
  Random accessible input is required.
@@ -748,6 +743,11 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
748
743
  ``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. If set to -1,
749
744
  shared memory will be dynamically allocated with the actual size of data. This is only used if
750
745
  ``python_multiprocessing`` is set to True. Default: ``None`` , allocate shared memory dynamically.
746
+ batch_sampler (Iterable, optional): Similar to `sampler` , but returns a batch of indices at a time, the
747
+ corresponding data will be combined into a batch. Mutually exclusive with `num_samples` , `shuffle` ,
748
+ `num_shards` , `shard_id` and `sampler` . Default: ``None`` , do not use batch sampler.
749
+ collate_fn (Callable[List[numpy.ndarray]], optional): Define how to merge a list of data into a batch.
750
+ Only valid if `batch_sampler` is used. Default: ``None`` , do not use collation function.
751
751
 
752
752
  Raises:
753
753
  RuntimeError: If source raises an exception during execution.
@@ -758,6 +758,11 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
758
758
  ValueError: If `num_shards` is specified but shard_id is None.
759
759
  ValueError: If shard_id is specified but `num_shards` is None.
760
760
  ValueError: If `shard_id` is not in range of [0, `num_shards` ).
761
+ TypeError: If `batch_sampler` is not iterable.
762
+ ValueError: If `batch_sampler` is specified together with `num_samples` ,
763
+ `shuffle` , `num_shards` , `shard_id` and `sampler`.
764
+ TypeError: If `collate_fn` is not callable.
765
+ ValueError: If `collate_fn` is specified while `batch_sampler` is None.
761
766
 
762
767
  Tutorial Examples:
763
768
  - `Load & Process Data With Dataset Pipeline
@@ -851,10 +856,10 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
851
856
  >>> dataset = ds.GeneratorDataset(source=[(np.array(0),), (np.array(1),), (np.array(2),)], column_names=["col"])
852
857
  """
853
858
 
854
- @check_generatordataset
859
+ @check_generator_dataset
855
860
  def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None,
856
861
  num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
857
- python_multiprocessing=True, max_rowsize=None):
862
+ python_multiprocessing=True, max_rowsize=None, batch_sampler=None, collate_fn=None):
858
863
  super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
859
864
  shuffle=shuffle, num_shards=num_shards, shard_id=shard_id)
860
865
  if isinstance(source, builtins.zip):
@@ -895,18 +900,41 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
895
900
  self.schema = schema
896
901
  if not isinstance(schema, Schema):
897
902
  self.schema = Schema(schema)
903
+
904
+ self.has_batch_sampler = False
905
+ if batch_sampler is not None:
906
+ self.has_batch_sampler = True
907
+ if not isinstance(batch_sampler, samplers.BuiltinSampler):
908
+ self.sampler = samplers.IterSampler(batch_sampler)
909
+ else:
910
+ self.sampler = batch_sampler
911
+
898
912
  # Move get dataset_size by len from parse to here, because self.source will
899
913
  # lose attribution of '__len__' after deepcopy.
914
+ self._calculate_source_length()
915
+
916
+ self.max_rowsize = max_rowsize if max_rowsize is not None else -1
917
+ self.sample_fn = None
918
+ # Ignore batch_info in the input parameter.
919
+ self.collate_fn = (lambda *args: collate_fn(*args[:-1])) if collate_fn is not None else None
920
+
921
+ def _calculate_source_length(self):
922
+ """Calculate the source length according to the source and sampler."""
900
923
  self.source_len = -1 # unknown
901
924
  if hasattr(self.source, "__len__"):
902
925
  self.source_len = len(self.source)
903
926
 
904
927
  # if user defined sampler, update the self.source_len
905
928
  if isinstance(self.sampler, samplers.Sampler) or hasattr(self.sampler, "__iter__"):
906
- self.source_len = len(list(sampler))
907
-
908
- self.max_rowsize = max_rowsize if max_rowsize is not None else -1
909
- self.sample_fn = None
929
+ if self.sampler.child_sampler is not None:
930
+ raise RuntimeError("GeneratorDataset does not support user defined sampler with child sampler yet.")
931
+ if self.sampler.num_samples is not None:
932
+ self.source_len = self.sampler.num_samples
933
+ elif hasattr(self.sampler, "__len__"):
934
+ self.source_len = len(self.sampler)
935
+ else:
936
+ # counting on a copied sampler to prevent changing the random state of the original one
937
+ self.source_len = len(list(copy.deepcopy(self.sampler)))
910
938
 
911
939
  def __deepcopy__(self, memodict):
912
940
  if id(self) in memodict:
@@ -917,18 +945,20 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
917
945
  type_check(index, (int, np.number), "index")
918
946
  if not hasattr(self.source, "__getitem__"):
919
947
  raise RuntimeError("Dataset don't support randomized access.")
948
+ if self.has_batch_sampler:
949
+ raise RuntimeError("GeneratorDataset with batch_sampler does not support random access.")
920
950
  if not hasattr(self, "generator_op"):
921
951
  dataset = copy.deepcopy(self)
922
952
  self.prepared_source = _generator_fn_wrapper(_cpp_sampler_fn, self.source)
923
953
  if self.schema is None:
924
954
  dataset.generator_node = cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types,
925
- self.source_len, self.sampler, 1, None)
955
+ self.source_len, self.sampler, 1, None, False)
926
956
  else:
927
957
  schema = self.schema
928
958
  if isinstance(schema, Schema):
929
959
  schema = self.schema.cpp_schema
930
960
  dataset.generator_node = cde.GeneratorNode(self.prepared_source, schema, self.source_len,
931
- self.sampler, 1, None)
961
+ self.sampler, 1, None, False)
932
962
  self.generator_op = dataset.generator_node.Build()
933
963
  sample_id = self.generator_op.GetMappedIndex(index)
934
964
  return self.source[sample_id]
@@ -945,9 +975,11 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
945
975
 
946
976
  def split(self, sizes, randomize=True):
947
977
  if hasattr(self.source, "__getitem__"):
948
- # If the source has __getitem__ attribute, call the split method of MappableDataset.
949
- # Otherwise, call the split method of Dataset.
950
- return super().split(sizes, randomize)
978
+ if not self.has_batch_sampler:
979
+ # If the source has __getitem__ attribute, call the split method of MappableDataset.
980
+ # Otherwise, call the split method of Dataset.
981
+ return super().split(sizes, randomize)
982
+ logger.warning("The performance of split will be degraded since batch_sampler is detected.")
951
983
  return super(MappableDataset, self).split(sizes, randomize)
952
984
 
953
985
  def prepare_multiprocessing(self):
@@ -984,12 +1016,12 @@ class GeneratorDataset(MappableDataset, UnionBaseDataset):
984
1016
  self.prepare_multiprocessing()
985
1017
  if self.schema is None:
986
1018
  return cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types, self.source_len,
987
- self.sampler, self.num_parallel_workers, self.sample_fn)
1019
+ self.sampler, self.num_parallel_workers, self.sample_fn, self.has_batch_sampler)
988
1020
  schema = self.schema
989
1021
  if isinstance(schema, Schema):
990
1022
  schema = self.schema.cpp_schema
991
1023
  return cde.GeneratorNode(self.prepared_source, schema, self.source_len, self.sampler,
992
- self.num_parallel_workers, self.sample_fn)
1024
+ self.num_parallel_workers, self.sample_fn, self.has_batch_sampler)
993
1025
 
994
1026
  def __validate_memory_usage(self):
995
1027
  """
@@ -1107,6 +1139,8 @@ class NumpySlicesDataset(GeneratorDataset):
1107
1139
  Default: ``None`` , expected order behavior shown in the table below.
1108
1140
  num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1109
1141
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1142
+ Used in `data parallel training <https://www.mindspore.cn/docs/en/master/model_train/
1143
+ parallel/data_parallel.html#data-parallel-mode-loads-datasets>`_ .
1110
1144
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This argument must be
1111
1145
  specified only when `num_shards` is also specified.
1112
1146
 
@@ -1149,7 +1183,7 @@ class NumpySlicesDataset(GeneratorDataset):
1149
1183
  >>> dataset = ds.NumpySlicesDataset(data=dict(df), shuffle=False)
1150
1184
  """
1151
1185
 
1152
- @check_numpyslicesdataset
1186
+ @check_numpy_slices_dataset
1153
1187
  def __init__(self, data, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None,
1154
1188
  num_shards=None, shard_id=None):
1155
1189
  dataset = _NumpySlicesDataset(data, column_names)
@@ -1202,7 +1236,7 @@ class PaddedDataset(GeneratorDataset):
1202
1236
  >>> dataset = ds.PaddedDataset(padded_samples=data)
1203
1237
  """
1204
1238
 
1205
- @check_paddeddataset
1239
+ @check_padded_dataset
1206
1240
  def __init__(self, padded_samples):
1207
1241
  dataset = _PaddedDataset(padded_samples)
1208
1242
  super().__init__(dataset, column_names=dataset.column_names, num_shards=None, shard_id=None, shuffle=False)