mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -29,10 +29,8 @@ import atexit
29
29
  import glob
30
30
  import json
31
31
  import os
32
- import queue
33
32
  import signal
34
33
  import stat
35
- import subprocess
36
34
  import warnings
37
35
 
38
36
  import time
@@ -41,6 +39,7 @@ import multiprocessing
41
39
  from importlib import import_module
42
40
  import sys
43
41
  import threading
42
+ from types import GeneratorType
44
43
 
45
44
  import copy
46
45
  import weakref
@@ -65,7 +64,6 @@ from mindspore.dataset.engine import samplers
65
64
  from mindspore.dataset.engine.samplers import Shuffle
66
65
  from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
67
66
  ITERATORS_LIST, _unset_iterator_cleanup, _cleanup_the_iterators_if_created
68
- from .queue import _SharedQueue, _Queue
69
67
  from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
70
68
  check_rename, check_device_send, check_take, check_output_shape, check_project, \
71
69
  check_sync_wait, check_zip_dataset, check_add_column, check_concat, check_split, check_bucket_batch_by_length, \
@@ -73,7 +71,8 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
73
71
  check_total_batch, check_sync_update
74
72
  from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
75
73
  get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, \
76
- _get_debug_hook_list, get_multiprocessing_start_method
74
+ _get_debug_hook_list, get_multiprocessing_start_method, get_video_backend, set_video_backend, \
75
+ get_error_samples_mode, ErrorSamplesMode
77
76
  from ..core.datatypes import mstype_to_detype
78
77
  from ..core.validator_helpers import replace_none
79
78
  from ..core.py_util_helpers import ExceptionHandler
@@ -575,6 +574,12 @@ class Dataset:
575
574
  create shared memory, and represents ``output_columns`` use the second element as the
576
575
  unit to create shared memory.
577
576
 
577
+ .. warning::
578
+ `batch` uses `dill` module implicitly in multiprocessing `spawn` mode to serialize/deserialize
579
+ `per_batch_map`, which is known to be insecure. It is possible to construct malicious pickle data
580
+ which will execute arbitrary code during unpickling. Never load data that could have come from
581
+ untrusted sources, or has been tampered with.
582
+
578
583
  Returns:
579
584
  Dataset, a new dataset with the above operation applied.
580
585
 
@@ -886,6 +891,12 @@ class Dataset:
886
891
 
887
892
  - offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
888
893
 
894
+ .. warning::
895
+ `map` uses `dill` module implicitly in multiprocessing `spawn` mode to serialize/deserialize `operations`,
896
+ which is known to be insecure. It is possible to construct malicious pickle data which will
897
+ execute arbitrary code during unpickling. Never load data that could have come from untrusted sources,
898
+ or has been tampered with.
899
+
889
900
  Note:
890
901
  - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
891
902
  Python functions (PyFuncs).
@@ -1557,7 +1568,7 @@ class Dataset:
1557
1568
  del api_tree
1558
1569
 
1559
1570
  @check_tuple_iterator
1560
- def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=True):
1571
+ def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=False):
1561
1572
  """
1562
1573
  Create an iterator over the dataset that yields samples of type list, whose elements are
1563
1574
  the data for each column.
@@ -1571,7 +1582,7 @@ class Dataset:
1571
1582
  convert it to Tensor. Default: ``False`` .
1572
1583
  do_copy (bool, optional): Whether to copy the data when converting output to Tensor,
1573
1584
  or reuse the buffer for better performance, only works when `output_numpy` is ``False`` .
1574
- Default: ``True`` .
1585
+ Default: ``False`` .
1575
1586
 
1576
1587
  Returns:
1577
1588
  Iterator, a dataset iterator that yields samples of type list.
@@ -1598,7 +1609,7 @@ class Dataset:
1598
1609
  return TupleIterator(self, columns, num_epochs, output_numpy, do_copy)
1599
1610
 
1600
1611
  @check_dict_iterator
1601
- def create_dict_iterator(self, num_epochs=-1, output_numpy=False, do_copy=True):
1612
+ def create_dict_iterator(self, num_epochs=-1, output_numpy=False, do_copy=False):
1602
1613
  """
1603
1614
  Create an iterator over the dataset that yields samples of type dict,
1604
1615
  while the key is the column name and the value is the data.
@@ -1610,7 +1621,7 @@ class Dataset:
1610
1621
  convert it to Tensor. Default: ``False`` .
1611
1622
  do_copy (bool, optional): Whether to copy the data when converting output to Tensor,
1612
1623
  or reuse the buffer for better performance, only works when `output_numpy` is ``False`` .
1613
- Default: ``True`` .
1624
+ Default: ``False`` .
1614
1625
 
1615
1626
  Returns:
1616
1627
  Iterator, a dataset iterator that yields samples of type dict.
@@ -2740,8 +2751,6 @@ class BatchDataset(UnionBaseDataset):
2740
2751
 
2741
2752
  self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(), self.num_parallel_workers,
2742
2753
  str(self), [self.per_batch_map], self.max_rowsize)
2743
- # Wrap per_batch_map into _PythonCallable
2744
- self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
2745
2754
  else:
2746
2755
  if self.per_batch_map is not None:
2747
2756
  self.per_batch_map = FuncWrapper(self.per_batch_map)
@@ -3045,95 +3054,6 @@ _OP_NAME = dict()
3045
3054
  _OP_PROCESS = dict()
3046
3055
 
3047
3056
 
3048
- # PythonCallable wrapper for multiprocess pyfunc
3049
- class _PythonCallable:
3050
- """
3051
- Internal Python function wrapper for multiprocessing pyfunc.
3052
- """
3053
-
3054
- def __init__(self, py_callable, idx, pool=None):
3055
- # Original Python callable from user.
3056
- self.py_callable = py_callable
3057
- # Process pool created for current iterator.
3058
- self.pool = pool
3059
- # Python callable index
3060
- self.idx = idx
3061
-
3062
- def __call__(self, *args):
3063
- result = None
3064
- get_data_from_worker_process = False
3065
- while get_data_from_worker_process is False:
3066
- if self.pool.is_running() and check_iterator_cleanup() is False:
3067
- try:
3068
- result = self.pool.execute(self.idx, *args)
3069
- except multiprocessing.TimeoutError:
3070
- continue
3071
- get_data_from_worker_process = True
3072
- else:
3073
- # worker process is stopped
3074
- logger.info("The worker process of map operation is stopped. "
3075
- "So return None to main thread and break the main thread.")
3076
- return None
3077
- # got value from worker process
3078
- if not isinstance(result, tuple) and get_data_from_worker_process is True:
3079
- result = (result,)
3080
- return result
3081
-
3082
- def to_json(self):
3083
- return self.py_callable.to_json()
3084
-
3085
-
3086
- # used when python_multiprocessing=True in map
3087
- class Pipe:
3088
- """
3089
- Class to handle communication between the master process and the worker processes.
3090
- """
3091
-
3092
- def __init__(self, warning_ctl, shared_memory=False, max_rowsize=(-1, -1)):
3093
- self.shared_memory = shared_memory
3094
- self.eof = multiprocessing.Event()
3095
- if self.shared_memory:
3096
- self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
3097
- self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
3098
- else:
3099
- self.in_queue = _Queue(1)
3100
- self.res_queue = _Queue(1)
3101
- self.in_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
3102
-
3103
- def master_send(self, func_index, data):
3104
- self.in_queue.put_nowait((func_index, *data))
3105
-
3106
- def master_receive(self):
3107
- if self.eof is None:
3108
- raise RuntimeError("EOF is none when get data from worker.")
3109
- if self.eof.is_set():
3110
- return None
3111
- return self.res_queue.get(timeout=1)
3112
-
3113
- def master_close(self):
3114
- self.eof.set()
3115
- self.send_finish_signal_to_worker()
3116
- self.send_finish_signal()
3117
-
3118
- def send_finish_signal(self):
3119
- self.worker_send(None)
3120
-
3121
- def send_finish_signal_to_worker(self):
3122
- self.master_send(0, "QUIT")
3123
-
3124
- def worker_send(self, data):
3125
- self.res_queue.put_until(data, timeout=1, exit_signal=self.eof)
3126
-
3127
- def worker_receive(self):
3128
- result = self.in_queue.get_until(timeout=1, exit_signal=self.eof)
3129
- if result is None:
3130
- return result
3131
- if len(result) == 1:
3132
- raise RuntimeError(f"Corrupted data. Worker received {len(result)} elements, it should be more than 1.")
3133
- func_index, *data = result
3134
- return func_index, tuple(data)
3135
-
3136
-
3137
3057
  def _main_process_already_exit():
3138
3058
  """
3139
3059
  Judge whether main process already exit.
@@ -3146,15 +3066,18 @@ def _main_process_already_exit():
3146
3066
  return False
3147
3067
 
3148
3068
 
3149
- def _worker_loop(operations, pipe, worker_id):
3069
+ def _worker_loop(quit_signal, operations, worker_id, op_type, key, video_backend=None):
3150
3070
  """
3151
3071
  Multiprocess worker process loop.
3072
+ The worker process(Python Layer) gets data from / sends data to map / batch thread(C++ layer) by message queue
3073
+ and shared memory. This logic no longer uses the Python multi-process pool, in_queue, and out_queue for
3074
+ data transferring.
3152
3075
  """
3153
3076
  # Initialize C++ side signal handlers
3154
3077
  cde.register_worker_handlers()
3155
3078
 
3156
- # Ensure that the process does not hang when exiting
3157
- pipe.res_queue.cancel_join_thread()
3079
+ if video_backend is not None:
3080
+ set_video_backend(video_backend)
3158
3081
 
3159
3082
  def _ignore_sigint():
3160
3083
  """
@@ -3168,121 +3091,197 @@ def _worker_loop(operations, pipe, worker_id):
3168
3091
  if get_seed() != 5489:
3169
3092
  set_seed(get_seed() + worker_id)
3170
3093
 
3094
+ msg_queue = cde.MessageQueue(key)
3095
+ msg_queue.set_release_flag(False)
3096
+ shm_queue = cde.SharedMemoryQueue(key)
3097
+ shm_queue.set_release_flag(False)
3098
+
3099
+ pid = str(os.getpid())
3100
+ ppid = str(os.getppid())
3101
+
3102
+ # Scenario: when the main process is killed, worker processe needs to release shm & msg.
3103
+ # The shm id and msg id should be released by SIGTERM in worker handler
3104
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3105
+ msg_queue.msg_queue_id)
3106
+
3107
+ num_receive = 0
3108
+ num_send = 0
3171
3109
  while not _main_process_already_exit():
3172
3110
  _ignore_sigint()
3173
3111
 
3174
- result = pipe.worker_receive()
3175
- if result is None:
3112
+ # quit by close_worker
3113
+ if quit_signal.is_set():
3176
3114
  return
3177
- (idx, input_tensors) = result
3178
- if input_tensors == "QUIT":
3179
- break
3115
+
3116
+ # >> receive procedure >>
3117
+ ## 1. get message queue which contains shared memory info from map C++ thread in main process
3180
3118
  try:
3181
- output_tensors = operations[idx](*input_tensors)
3119
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3120
+ msg_queue.msg_queue_id)
3121
+ msg_queue.msg_rcv(cde.MASTER_SEND_DATA_MSG)
3122
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3123
+ msg_queue.msg_queue_id)
3124
+ except RuntimeError as err:
3125
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3126
+ msg_queue.msg_queue_id)
3127
+ # the msg_queue had been released by main process, ignore it in worker process
3128
+ if "errno: 2" in str(err):
3129
+ # Because the worker process does not release msg and shm, continue
3130
+ continue
3131
+ raise err
3182
3132
 
3183
- pipe.worker_send(output_tensors)
3184
- except Exception:
3185
- pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
3186
- # Do not return
3133
+ ## when the message queue had been released, break the loop
3134
+ if msg_queue.message_queue_state() == cde.MessageState.RELEASED:
3135
+ logger.info("The message queue had been released, worker loop end.")
3136
+ break
3187
3137
 
3188
- # release the queue when stop the worker by master
3189
- del pipe.in_queue
3190
- del pipe.res_queue
3138
+ num_receive += 1
3191
3139
 
3140
+ logger.info("Python process {} worker({}) receives {} samples from map thread.".format(op_type, worker_id,
3141
+ num_receive))
3192
3142
 
3193
- def worker_target(operations, worker_id):
3194
- logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
3195
- return lambda pipe: _worker_loop(operations, pipe, worker_id)
3143
+ # convert the data from shm to python data
3144
+ if op_type == cde.MAP_OP:
3145
+ ## 2. construct shared memory to TensorRow which contains one / more columns
3146
+ tensor_row = shm_queue.to_tensor_row(msg_queue.shm_id, msg_queue.shm_size)
3196
3147
 
3148
+ ## 3. convert TensorRow to Python tuple which elements are a column
3149
+ tuple_column = cde.convert_tensor_row_to_py_tuple(tensor_row)
3197
3150
 
3198
- class WorkerTarget:
3199
- def __init__(self, operations, pipe, worker_id):
3200
- self.operations = operations
3201
- self.pipe = pipe
3202
- self.worker_id = worker_id
3203
- logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
3151
+ py_func_input = tuple_column
3152
+ elif op_type == cde.BATCH_OP:
3153
+ ## 2. construct shard memory to TensorTable which contains one / more TensorRow & CBatchInfo
3154
+ tensor_table, batch_info, _ = shm_queue.to_tensor_table(msg_queue.shm_id, msg_queue.shm_size)
3204
3155
 
3205
- def __call__(self):
3206
- return _worker_loop(self.operations, self.pipe, self.worker_id)
3156
+ ## 3. convert TensorTable to Python tuple tuple
3157
+ # The tuple indicate the multi columns
3158
+ # The list indicate the multi rows
3159
+ tuple_list_column = cde.convert_tensor_table_to_py_tuple_list(tensor_table)
3207
3160
 
3161
+ py_func_input = (*tuple_list_column, batch_info)
3162
+ else:
3163
+ raise RuntimeError("The op_type: {} is invalid.".format(op_type))
3208
3164
 
3209
- class _MPWorker(multiprocessing.Process):
3210
- """
3211
- Worker process for multiprocessing.
3212
- """
3165
+ # execute the pyfunc
3166
+ try:
3167
+ py_func_output = py_func_input
3213
3168
 
3214
- def __init__(self, operations, warning_ctl, max_rowsize=(-1, -1), worker_id=0):
3215
- shared_memory = get_enable_shared_mem()
3216
- self.pipe = Pipe(warning_ctl, shared_memory=shared_memory, max_rowsize=max_rowsize)
3217
- self.check_interval = get_multiprocessing_timeout_interval()
3218
- super().__init__(target=worker_target(operations, worker_id), name="MapWorker" + str(worker_id),
3219
- args=(self.pipe,), daemon=True)
3220
-
3221
- def execute(self, idx, *args):
3222
- """Acquiring data from a worker in an infinite loop"""
3223
- self.pipe.master_send(idx, args)
3224
- time_s = time.time()
3225
- wait_count = 1
3226
- while True:
3227
- cost_time = time.time() - time_s
3228
- if cost_time / self.check_interval >= wait_count:
3229
- wait_count += 1
3230
- logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
3231
- "worker of the map operation is hanging. "
3232
- "Check whether the user defined data transform is too slow or the "
3233
- "output data is too large. You can also set the timeout interval by "
3234
- "ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
3235
- "of this log.")
3236
- pid = self.pid
3237
- logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
3238
- install_status, _ = subprocess.getstatusoutput("py-spy --version")
3239
- if install_status == 0:
3240
- stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
3241
- logger.warning("Map worker subprocess stack:\n{}".format(stack))
3169
+ # execute the remaining operations
3170
+ for idx in range(len(operations)):
3171
+ if isinstance(py_func_output, tuple):
3172
+ py_func_output = operations[idx](*py_func_output)
3242
3173
  else:
3243
- logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
3174
+ py_func_output = operations[idx](py_func_output)
3175
+
3176
+ # << send procedure <<
3177
+ # the result is None
3178
+ if py_func_output is None:
3179
+ raise RuntimeError("Got None from Python Function which is defined by {}".format(op_type))
3180
+
3181
+ # convert the output to tuple
3182
+ if not isinstance(py_func_output, tuple):
3183
+ py_func_output = (py_func_output,)
3184
+
3185
+ if op_type == cde.MAP_OP:
3186
+ # check if the map return Generator type
3187
+ for item in py_func_output:
3188
+ if isinstance(item, GeneratorType):
3189
+ raise RuntimeError("Cannot pickle <class 'generator'> object, please verify pyfunc "
3190
+ "return with numpy array")
3191
+
3192
+ ## 1. convert Python tuple to TensorRow
3193
+ output_tensor_row = cde.convert_py_tuple_to_tensor_row(py_func_output)
3194
+
3195
+ ## 2. convert TensorRow to shared memory
3196
+ shm_queue.from_tensor_row(output_tensor_row)
3197
+ elif op_type == cde.BATCH_OP:
3198
+ ## 1. convert Python tuple tuple to TensorTable
3199
+ output_tensor_table, concat_batch = cde.convert_py_tuple_list_to_tensor_table(py_func_output)
3200
+
3201
+ ## 2. convert TensorTable to shared memory
3202
+ shm_queue.from_tensor_table(output_tensor_table, batch_info, concat_batch)
3203
+ else:
3204
+ raise RuntimeError("The op_type: {} is invalid.".format(op_type))
3205
+
3206
+ ## 3. send message queue which contains shared memory to map C++ thread in main process
3207
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3208
+ msg_queue.msg_queue_id)
3209
+ msg_queue.msg_snd(cde.WORKER_SEND_DATA_MSG, shm_queue.get_shm_id(), shm_queue.get_shm_size())
3210
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3211
+ msg_queue.msg_queue_id)
3212
+
3213
+ num_send += 1
3214
+ logger.info("Python process {} worker({}) sends {} samples to map thread.".format(op_type, worker_id,
3215
+ num_send))
3216
+ except Exception:
3244
3217
  try:
3245
- res = self.pipe.master_receive()
3246
- except queue.Empty:
3247
- continue
3248
- if res is None:
3249
- # receive finish signal
3250
- return None
3251
- if isinstance(res, ExceptionHandler):
3252
- res.reraise()
3253
- return res
3254
-
3255
- def close(self):
3256
- try:
3257
- if self.is_alive():
3258
- # release the eager executor which is used by current process
3259
- transforms.transforms.clean_unused_executors()
3260
-
3261
- logger.info(f"Closing worker with PID: {self.pid}")
3262
- self.pipe.master_close()
3263
-
3264
- process_dir = os.path.join('/proc', str(self.pid))
3265
- while self.is_alive() and os.path.exists(process_dir):
3266
- logger.info("Waiting for worker {} closed ...".format(self.pid))
3267
- time.sleep(0.001)
3268
-
3269
- # del the handle which hold by master
3270
- del self.pipe.in_queue
3271
- del self.pipe.res_queue
3272
- super().terminate()
3273
- super().join()
3274
- super().close()
3275
-
3276
- except ValueError:
3277
- # Process has been closed already
3218
+ if op_type == cde.MAP_OP:
3219
+ pyfunc_err = ExceptionHandler(where="in map worker and execute Python function")
3220
+ elif op_type == cde.BATCH_OP:
3221
+ pyfunc_err = ExceptionHandler(where="in batch(per_batch_map) worker and execute Python function")
3222
+ else:
3223
+ pyfunc_err = "The op_type: {} is invalid.".format(op_type)
3224
+ pyfunc_err.reraise()
3225
+ except Exception as err:
3226
+ _, _, exc_tb = sys.exc_info()
3227
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
3228
+
3229
+ if op_type == cde.MAP_OP:
3230
+ logger.info("Got exception {} from Map Worker({})".format(str(err), worker_id))
3231
+ elif op_type == cde.BATCH_OP:
3232
+ logger.info("Got exception {} from Batch Worker({})".format(str(err), worker_id))
3233
+ else:
3234
+ logger.info("The op_type: {} is invalid.".format(op_type))
3235
+
3236
+ # err_code, lineno, filename, err_desc
3237
+ msg_queue.serialize_status(cde.StatusCode.MD_PY_FUNC_EXCEPTION, exc_tb.tb_lineno, fname, str(err))
3238
+
3239
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3240
+ msg_queue.msg_queue_id)
3241
+ msg_queue.msg_snd(cde.WORKER_SEND_DATA_MSG, shm_queue.get_shm_id(), shm_queue.get_shm_size())
3242
+ cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
3243
+ msg_queue.msg_queue_id)
3244
+
3245
+ # worker error
3246
+ if get_error_samples_mode() == ErrorSamplesMode.RETURN:
3247
+ break
3248
+ else:
3249
+ # continue the loop, when the get_error_samples_mode() is REPLACE or SKIP
3250
+ continue
3251
+
3252
+ # release the eager executor which is used by current process
3253
+ transforms.transforms.clean_unused_executors()
3254
+
3255
+ while not _main_process_already_exit():
3256
+ # quit by close_worker
3257
+ if quit_signal.is_set():
3278
3258
  return
3279
- return
3280
3259
 
3281
- def is_alive(self):
3282
- try:
3283
- return super().is_alive()
3284
- except ValueError:
3285
- return False
3260
+ logger.info("The worker process is waiting for the main process to exit.")
3261
+ time.sleep(0.1)
3262
+
3263
+ # the main process is not exist yet which maybe killed -9
3264
+ msg_queue.set_release_flag(True)
3265
+ msg_queue.release()
3266
+ shm_queue.set_release_flag(True)
3267
+ shm_queue.release()
3268
+
3269
+
3270
+ class WorkerTarget:
3271
+ """Mulitprocess mode for dataset map or batch"""
3272
+ def __init__(self, quit_signal, operations, worker_id, op_type, ftok_key):
3273
+ self.quit_signal = quit_signal
3274
+ self.operations = operations
3275
+ self.worker_id = worker_id
3276
+ self.op_type = op_type
3277
+ self.ftok_key = ftok_key
3278
+ start_method = multiprocessing.get_start_method()
3279
+ logger.info("Multiprocessing start method: {}".format(start_method))
3280
+ self.video_backend = get_video_backend() if start_method == 'spawn' else None
3281
+
3282
+ def __call__(self):
3283
+ return _worker_loop(self.quit_signal, self.operations, self.worker_id, self.op_type, self.ftok_key,
3284
+ self.video_backend)
3286
3285
 
3287
3286
 
3288
3287
  def worker_is_alive(worker):
@@ -3293,24 +3292,31 @@ def worker_is_alive(worker):
3293
3292
  return False
3294
3293
 
3295
3294
 
3296
- def close_worker(worker, pipe):
3295
+ def close_worker(worker, eof):
3297
3296
  """Close the subprocess worker in spawn mode"""
3298
3297
  try:
3299
3298
  if worker_is_alive(worker):
3300
3299
  # release the eager executor which is used by current process
3301
3300
  transforms.transforms.clean_unused_executors()
3302
3301
 
3303
- logger.info(f"Closing worker with PID: {worker.pid}")
3304
- pipe.master_close()
3302
+ # let the worker exit
3303
+ logger.info("Set eof flag for worker with PID: {}.".format(worker.pid))
3304
+ eof.set()
3305
+
3306
+ # wait timeout
3307
+ wait_timeout = 2
3308
+ start_time = time.time()
3305
3309
 
3306
3310
  process_dir = os.path.join('/proc', str(worker.pid))
3307
3311
  while worker_is_alive(worker) and os.path.exists(process_dir):
3308
3312
  logger.info("Waiting for worker {} closed ...".format(worker.pid))
3309
3313
  time.sleep(0.5)
3310
3314
 
3315
+ # maybe the worker is hung by msg_queue.MsgRcv, so break the loop and terminate it in next step
3316
+ if time.time() - start_time > wait_timeout:
3317
+ break
3318
+
3311
3319
  # del the handle which hold by master
3312
- del pipe.in_queue
3313
- del pipe.res_queue
3314
3320
  worker.terminate()
3315
3321
  worker.join()
3316
3322
  worker.close()
@@ -3367,7 +3373,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3367
3373
  self.warning_ctl = None
3368
3374
  # cache thread (get_ident()) to worker_id mapping in Python layer
3369
3375
  self.python_threads_to_workers = {}
3370
- self.eof = None
3376
+ self.eof_workers = []
3377
+ self.eof_clean_process = None
3371
3378
  self.running = False
3372
3379
 
3373
3380
  def __del__(self):
@@ -3443,19 +3450,39 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3443
3450
  del workers
3444
3451
  os.kill(os.getpid(), signal.SIGTERM)
3445
3452
 
3446
- def launch(self, op_id=-1):
3453
+ def launch(self, op_id, op_type, ftok_keys):
3447
3454
  """
3448
3455
  Launch Python multiprocessing pool.
3449
3456
 
3450
3457
  Args:
3451
- op_id: ID for operation to have Python multiprocessing pool launched
3458
+ op_id (int): ID for operation to have Python multiprocessing pool launched
3459
+ op_type (str): Indicate MapOp / BatchOp
3460
+ ftok_keys (list[int]): the ftok key of list for msg queue and shm queue
3452
3461
 
3453
3462
  Returns:
3454
3463
  Python multiprocessing pool is launched.
3455
3464
  """
3456
3465
  self.python_threads_to_workers = {}
3466
+
3467
+ if not isinstance(op_id, int):
3468
+ raise RuntimeError("The op_id is not int.")
3457
3469
  self.op_id = op_id
3458
- logger.info("Launching new Python multiprocessing pool for Op: " + str(self.op_id))
3470
+
3471
+ valid_op_type = [cde.MAP_OP, cde.BATCH_OP]
3472
+ if op_type not in valid_op_type:
3473
+ raise RuntimeError("The op_type: {} is not in {}.".format(op_type, valid_op_type))
3474
+ self.op_type = op_type
3475
+
3476
+ if not isinstance(ftok_keys, list):
3477
+ raise RuntimeError("The ftok_keys is not a list.")
3478
+ if not all(isinstance(x, int) for x in ftok_keys):
3479
+ raise RuntimeError("The item in ftok_keys is not all int.")
3480
+ if len(ftok_keys) != self.num_parallel_workers:
3481
+ raise RuntimeError("The len of ftok_keys is not equal to num_parallel_workers.")
3482
+ self.ftok_keys = ftok_keys
3483
+
3484
+ logger.info("Launching new Python multiprocessing pool for Op: " + self.op_type + "(" + str(self.op_id) + \
3485
+ "), ftok_keys: " + str(self.ftok_keys))
3459
3486
  if self.is_mp_enabled():
3460
3487
  message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
3461
3488
  " The existing pool will be terminated first."
@@ -3478,30 +3505,21 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3478
3505
  raise Exception("Pool was already created, close it first.")
3479
3506
 
3480
3507
  self.workers = []
3481
- self.pipes = []
3482
- self.check_interval = get_multiprocessing_timeout_interval()
3483
3508
  self.warning_ctl = multiprocessing.Value('i', 0)
3484
- if self.start_method == "fork":
3485
- # Construct python worker processes
3486
- for worker_id in range(self.num_parallel_workers):
3487
- worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, worker_id)
3488
- worker.start()
3489
- self.workers.append(worker)
3490
- else:
3491
- multiprocessing.set_start_method(self.start_method, True)
3492
3509
 
3493
- # Construct python worker processes
3494
- for worker_id in range(self.num_parallel_workers):
3495
- shared_memory = get_enable_shared_mem()
3496
- pipe = Pipe(self.warning_ctl, shared_memory=shared_memory, max_rowsize=self.max_rowsize)
3497
- self.check_interval = get_multiprocessing_timeout_interval()
3498
- worker = multiprocessing.Process(target=WorkerTarget(self.operations, pipe, worker_id),
3499
- name="MapWorker" + str(worker_id), daemon=True)
3500
- self.workers.append(worker)
3501
- self.pipes.append(pipe)
3502
- worker.start()
3510
+ multiprocessing.set_start_method(self.start_method, True)
3511
+
3512
+ # Construct python worker processes
3513
+ for worker_id in range(self.num_parallel_workers):
3514
+ eof = multiprocessing.Event()
3515
+ worker = multiprocessing.Process(target=WorkerTarget(eof, self.operations, worker_id, self.op_type,
3516
+ self.ftok_keys[worker_id]),
3517
+ name="MapWorker" + str(worker_id), daemon=True)
3518
+ self.eof_workers.append(eof)
3519
+ self.workers.append(worker)
3520
+ worker.start()
3503
3521
 
3504
- multiprocessing.set_start_method("fork", True)
3522
+ multiprocessing.set_start_method("fork", True)
3505
3523
 
3506
3524
  logger.info("Launch worker process(es): {}".format(self.get_pids()))
3507
3525
 
@@ -3515,6 +3533,20 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3515
3533
  atexit.register(lambda cleanup: cleanup()() if cleanup() is not None else None,
3516
3534
  weakref.WeakMethod(self.terminate))
3517
3535
 
3536
+ # Ensure that all workers are in the running state
3537
+ start = time.time()
3538
+ wait_time = 120 # 120s
3539
+ while True:
3540
+ if self.is_running():
3541
+ logger.info("All workers has been running state.")
3542
+ break
3543
+ else:
3544
+ time.sleep(0.5)
3545
+ if time.time() - start > wait_time:
3546
+ logger.error("All worker processes have not reached the running state within " + str(wait_time) +
3547
+ " seconds, data processing errors may occur.")
3548
+ break
3549
+
3518
3550
  def terminate(self):
3519
3551
  if self.running:
3520
3552
  # abort the monitor first and then close all the workers
@@ -3543,7 +3575,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3543
3575
  continue
3544
3576
  return self.pids
3545
3577
 
3546
- def add_new_workers(self, num_new_workers):
3578
+ def add_new_workers(self, num_new_workers, op_type, ftok_keys):
3579
+ """Used by AutoTune"""
3547
3580
  logger.info(
3548
3581
  "Increasing num_parallel_workers of Python Multiprocessing pool for Op:" + str(self.op_id) +
3549
3582
  ", old num_workers=" + str(self.num_parallel_workers) + " new num_workers=" + str(
@@ -3551,9 +3584,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3551
3584
  num_new_workers) + ".")
3552
3585
  self.terminate()
3553
3586
  self.num_parallel_workers += num_new_workers
3554
- self.launch(self.op_id)
3555
3587
 
3556
- def remove_workers(self, num_removed_workers):
3588
+ if self.num_parallel_workers != len(ftok_keys):
3589
+ raise RuntimeError("Add new workers failed, the num_workers is not equal size of ftok_keys.")
3590
+
3591
+ self.launch(self.op_id, op_type, ftok_keys)
3592
+
3593
+ def remove_workers(self, num_removed_workers, op_type, ftok_keys):
3594
+ """Used by AutoTune"""
3557
3595
  logger.info(
3558
3596
  "Decreasing num_parallel_workers of Python Multiprocessing pool for Op:" + str(self.op_id) +
3559
3597
  ", old num_workers=" + str(self.num_parallel_workers) + " new num_workers=" + str(
@@ -3561,59 +3599,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3561
3599
  num_removed_workers) + ".")
3562
3600
  self.terminate()
3563
3601
  self.num_parallel_workers -= num_removed_workers
3564
- self.launch(self.op_id)
3565
3602
 
3566
- def is_mp_enabled(self):
3567
- return self.workers is not None
3603
+ if self.num_parallel_workers != len(ftok_keys):
3604
+ raise RuntimeError("Remove workers failed, the num_workers is not equal size of ftok_keys.")
3568
3605
 
3569
- def execute(self, idx, *args):
3570
- """
3571
- Execute
3572
- """
3573
- t_id = threading.get_ident()
3574
- # get the worker_id from Python layer cache first, get from Cpp layer if not found.
3575
- worker_id = self.python_threads_to_workers.setdefault(t_id, self.get_thread_to_worker())
3576
- if worker_id >= len(self.workers):
3577
- raise RuntimeError("[Internal] worker_id value is greater than number of available workers!")
3578
-
3579
- # todo check_iterator_cleanup
3580
- if self.is_running() and check_iterator_cleanup() is False:
3581
- if self.start_method == "fork":
3582
- return self.workers[worker_id].execute(idx, *args)
3583
- # spawn mode
3584
- self.pipes[worker_id].master_send(idx, args)
3585
- time_s = time.time()
3586
- wait_count = 1
3587
- while True:
3588
- cost_time = time.time() - time_s
3589
- if cost_time / self.check_interval >= wait_count:
3590
- wait_count += 1
3591
- logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
3592
- "worker of the map operation is hanging. "
3593
- "Check whether the user defined data transform is too slow or the "
3594
- "output data is too large. You can also set the timeout interval by "
3595
- "ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
3596
- "of this log.")
3597
- pid = self.workers[worker_id].pid
3598
- logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
3599
- install_status, _ = subprocess.getstatusoutput("py-spy --version")
3600
- if install_status == 0:
3601
- stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
3602
- logger.warning("Map worker subprocess stack:\n{}".format(stack))
3603
- else:
3604
- logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
3605
- try:
3606
- res = self.pipes[worker_id].master_receive()
3607
- except queue.Empty:
3608
- continue
3609
- if res is None:
3610
- # receive finish signal
3611
- return None
3612
- if isinstance(res, ExceptionHandler):
3613
- res.reraise()
3614
- return res
3606
+ self.launch(self.op_id, op_type, ftok_keys)
3615
3607
 
3616
- return None
3608
+ def is_mp_enabled(self):
3609
+ return self.workers is not None
3617
3610
 
3618
3611
  def _launch_monitor(self):
3619
3612
  """
@@ -3622,26 +3615,28 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3622
3615
  The watch dog will clean up subprocesses and main process when any subprocess exited.
3623
3616
  """
3624
3617
  if platform.system().lower() != 'windows':
3625
- self.eof = multiprocessing.Event()
3618
+ self.eof_clean_process = multiprocessing.Event()
3626
3619
  self.cleaning_process = multiprocessing.Process(target=self._clean_process,
3627
3620
  name="MapCleanProcess",
3628
- args=(self.ppid, self.workers, self.eof),
3621
+ args=(self.ppid, self.workers, self.eof_clean_process),
3629
3622
  daemon=True)
3630
3623
  self.cleaning_process.start()
3631
3624
  logger.info("Launch clean process {} to monitor worker "
3632
3625
  "process(es): {}".format(self.cleaning_process.pid, self.get_pids()))
3633
3626
 
3634
3627
  if get_enable_watchdog():
3635
- worker_ids = [worker.pid for worker in self.workers]
3628
+ worker_ids = [os.getpid()]
3629
+ worker_ids.extend([worker.pid for worker in self.workers])
3636
3630
  worker_ids.append(self.cleaning_process.pid)
3637
- cde.register_worker_pids(id(self), set(worker_ids))
3631
+ cde.register_worker_pids(id(self), worker_ids)
3638
3632
 
3639
3633
  def _abort_monitor(self):
3640
3634
  """Deregister workers monitored by the watch dog and join clean process."""
3641
3635
  if get_enable_watchdog():
3642
3636
  cde.deregister_worker_pids(id(self))
3643
- if hasattr(self, 'eof') and self.eof is not None:
3644
- self.eof.set()
3637
+ if hasattr(self, 'eof') and self.eof_clean_process is not None:
3638
+ logger.info("Set eof flag for cleaning_process.")
3639
+ self.eof_clean_process.set()
3645
3640
  if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
3646
3641
  # let the quit event notify the cleaning process to exit
3647
3642
  self.cleaning_process.join(timeout=5)
@@ -3652,20 +3647,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3652
3647
 
3653
3648
  def is_running(self):
3654
3649
  if hasattr(self, 'workers') and self.workers is not None:
3655
- if self.start_method == "fork":
3656
- return all([w.is_alive() for w in self.workers])
3657
3650
  return all([worker_is_alive(w) for w in self.workers])
3658
3651
  return False
3659
3652
 
3660
3653
  def close_all_workers(self):
3661
3654
  """Close all the subprocess workers"""
3662
3655
  if hasattr(self, 'workers') and self.workers is not None:
3663
- if self.start_method == "fork":
3664
- for w in self.workers:
3665
- w.close()
3666
- else:
3667
- for i, w in enumerate(self.workers):
3668
- close_worker(w, self.pipes[i])
3656
+ for index in range(len(self.workers)):
3657
+ close_worker(self.workers[index], self.eof_workers[index])
3669
3658
 
3670
3659
  check_interval = get_multiprocessing_timeout_interval()
3671
3660
  for w in self.workers:
@@ -3682,12 +3671,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3682
3671
  continue
3683
3672
  raise e
3684
3673
  try:
3685
- if self.start_method == "fork":
3686
- if w.is_alive():
3687
- os.close(subprocess_file_descriptor)
3688
- else:
3689
- if worker_is_alive(w):
3690
- os.close(subprocess_file_descriptor)
3674
+ if worker_is_alive(w):
3675
+ os.close(subprocess_file_descriptor)
3691
3676
  except OSError as e:
3692
3677
  # Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
3693
3678
  if "Bad file descriptor" not in str(e):
@@ -3696,8 +3681,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
3696
3681
  # use clear to release the handle which is better than self.workers = None
3697
3682
  self.workers.clear()
3698
3683
  self.workers = None
3699
- self.pipes.clear()
3700
- self.pipes = None
3684
+ self.eof_workers.clear()
3685
+ self.eof_workers = []
3686
+
3687
+ # as it can cause the main process to not exit when PyFunc executes very slowly so release
3688
+ # the shm & msg here
3689
+ cde.release_shm_and_msg_by_worker_pids(self.pids)
3701
3690
  self.pids = None
3702
3691
 
3703
3692
 
@@ -3775,7 +3764,22 @@ class MapDataset(UnionBaseDataset):
3775
3764
 
3776
3765
  count_old_transforms, count_new_transforms, count_non_data_vision_transforms = \
3777
3766
  self.__count_transforms(operations)
3767
+ count_py_ops = self.__count_py_ops(operations)
3778
3768
  count_pyfunc = self.__count_pyfuncs(operations)
3769
+
3770
+ # Whether to execute ops in the thread mode
3771
+ # op_type python_multiprocessing run_in_thread
3772
+ # c_op(s) false yes
3773
+ # c_op(s) true yes
3774
+ # py_op(s) / PyFunc false yes
3775
+ # py_op(s) / PyFunc true no
3776
+ # c_op(s) + py_op(s) / PyFunc false yes
3777
+ # c_op(s) + py_op(s) / PyFunc true no
3778
+ run_in_thread = not self.python_multiprocessing or (count_pyfunc == 0 and count_py_ops == 0) or get_debug_mode()
3779
+
3780
+ if self.python_multiprocessing and platform.system().lower() == 'windows':
3781
+ run_in_thread = True
3782
+
3779
3783
  if count_new_transforms + count_pyfunc == len(operations):
3780
3784
  prev_op = None
3781
3785
  for op in operations:
@@ -3793,18 +3797,43 @@ class MapDataset(UnionBaseDataset):
3793
3797
  op.implementation = Implementation.C
3794
3798
  prev_op = op
3795
3799
  operations = self.__insert_debug_wrapper(operations)
3796
- operations = transforms.transforms.Compose.reduce(operations)
3800
+ if run_in_thread:
3801
+ operations = transforms.transforms.Compose.reduce(operations)
3797
3802
  elif count_old_transforms + count_pyfunc + count_non_data_vision_transforms == len(operations):
3798
3803
  operations = self.__insert_debug_wrapper(operations)
3799
- operations = transforms.py_transforms.Compose.reduce(operations)
3804
+ if run_in_thread:
3805
+ operations = transforms.py_transforms.Compose.reduce(operations)
3800
3806
  else:
3801
3807
  raise RuntimeError("Mixing old legacy c/py_transforms and new unified transforms is not allowed.")
3802
3808
 
3803
- self.operations = self.__process_final_operations(operations)
3809
+ if run_in_thread:
3810
+ self.operations = self.__process_final_operations(operations)
3811
+ else:
3812
+ self.operations = operations
3804
3813
  self.prepare_multiprocessing()
3805
3814
 
3806
3815
  callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
3807
- return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
3816
+
3817
+ ## thread mode
3818
+ if run_in_thread:
3819
+ return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
3820
+ callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3821
+
3822
+ # Bind self.operations with self.process_pool
3823
+ class _BindProcessPoolWithOperations:
3824
+ def __init__(self, pool, operations):
3825
+ self.pool = pool
3826
+ self.operations = operations
3827
+
3828
+ def __call__(self):
3829
+ pass
3830
+
3831
+ self.bound = _BindProcessPoolWithOperations(self.process_pool, self.operations)
3832
+
3833
+ ## process mode
3834
+ # in multi process mode, we just transfer the self.bound which is not really used in c layer
3835
+ # because when the pipeline is running, map thread transfer data through c++ shm & msg to Python Worker Process
3836
+ return cde.MapNode(children[0], [self.bound], self.input_columns, self.output_columns,
3808
3837
  callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
3809
3838
 
3810
3839
  def __deepcopy__(self, memodict):
@@ -3857,10 +3886,22 @@ class MapDataset(UnionBaseDataset):
3857
3886
  @staticmethod
3858
3887
  def __count_pyfuncs(operations):
3859
3888
  """
3860
- Count the number of pyfuncs operations
3889
+ Count the number of pyfuncs operations which is defined by user
3861
3890
  """
3862
3891
  return sum([1 if isinstance(op, FuncWrapper) else 0 for op in operations])
3863
3892
 
3893
+ @staticmethod
3894
+ def __count_py_ops(operations):
3895
+ """
3896
+ Count the number of python operations which is built-in
3897
+ """
3898
+ count = 0
3899
+ for op in operations:
3900
+ if hasattr(op, "implementation") and op.implementation != Implementation.C \
3901
+ and op.implementation is not None:
3902
+ count += 1
3903
+ return count
3904
+
3864
3905
  @staticmethod
3865
3906
  def __count_transforms(operations):
3866
3907
  """
@@ -3924,7 +3965,6 @@ class MapDataset(UnionBaseDataset):
3924
3965
  " Ignoring Python multiprocessing for map operation.")
3925
3966
  return
3926
3967
  if self.python_multiprocessing:
3927
- iter_specific_operations = []
3928
3968
  callable_list = []
3929
3969
 
3930
3970
  # If user didn't specify num_parallel_workers, set it to default
@@ -3941,18 +3981,6 @@ class MapDataset(UnionBaseDataset):
3941
3981
  self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(),
3942
3982
  self.num_parallel_workers, str(self),
3943
3983
  callable_list, self.max_rowsize)
3944
- # Pass #2
3945
- idx = 0
3946
- for op in self.operations:
3947
- # our c transforms is now callable and should not be run in Python multithreading
3948
- if MapDataset.__operation_valid_for_multiprocessing(op):
3949
- # Wrap Python callable into _PythonCallable
3950
- iter_specific_operations.append(_PythonCallable(op, idx, self.process_pool))
3951
- idx += 1
3952
- else:
3953
- # CPP ops remain the same
3954
- iter_specific_operations.append(op)
3955
- self.operations = iter_specific_operations
3956
3984
 
3957
3985
  def __insert_debug_wrapper(self, operations):
3958
3986
  """
@@ -4385,7 +4413,7 @@ class TransferDataset(Dataset):
4385
4413
  def create_dict_iterator(self, num_epochs=-1, output_numpy=False):
4386
4414
  raise RuntimeError("TransferDataset is not iterable.")
4387
4415
 
4388
- def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=True):
4416
+ def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=False):
4389
4417
  raise RuntimeError("TransferDataset is not iterable.")
4390
4418
 
4391
4419
  def __iter__(self):