mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +13 -6
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -0
- mindspore/_checkparam.py +3 -38
- mindspore/_deprecated/__init__.py +17 -0
- mindspore/_deprecated/jit.py +198 -0
- mindspore/_extends/builtin_operations.py +1 -1
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +6 -7
- mindspore/_extends/parse/compile_config.py +83 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
- mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
- mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
- mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
- mindspore/_extends/parse/parser.py +46 -197
- mindspore/_extends/parse/resources.py +1 -5
- mindspore/_extends/parse/standard_method.py +217 -98
- mindspore/_extends/pijit/__init__.py +2 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
- mindspore/_extends/pijit/tensor_func_list.py +27 -0
- mindspore/_extends/utils.py +1 -1
- mindspore/amp.py +11 -5
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/__init__.py +2 -2
- mindspore/boost/base.py +3 -7
- mindspore/boost/boost_cell_wrapper.py +138 -43
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +6 -3
- mindspore/common/_grad_function.py +56 -0
- mindspore/common/_pijit_context.py +14 -5
- mindspore/common/_register_for_tensor.py +1 -2
- mindspore/common/_stub_tensor.py +30 -14
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +4760 -0
- mindspore/common/api.py +435 -371
- mindspore/common/auto_dynamic_shape.py +41 -44
- mindspore/common/dtype.py +39 -36
- mindspore/common/dump.py +9 -6
- mindspore/common/file_system.py +9 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +6 -2
- mindspore/common/initializer.py +13 -10
- mindspore/common/jit_begin_end.py +94 -0
- mindspore/common/jit_config.py +6 -1
- mindspore/common/jit_context.py +76 -0
- mindspore/common/jit_trace.py +378 -0
- mindspore/common/lazy_inline.py +9 -3
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/mutable.py +5 -4
- mindspore/common/parameter.py +135 -52
- mindspore/common/seed.py +2 -2
- mindspore/common/sparse_tensor.py +23 -17
- mindspore/common/tensor.py +951 -1992
- mindspore/communication/__init__.py +7 -5
- mindspore/communication/_comm_helper.py +52 -2
- mindspore/communication/comm_func.py +240 -181
- mindspore/communication/management.py +95 -26
- mindspore/context.py +314 -566
- mindspore/dataset/__init__.py +65 -37
- mindspore/dataset/audio/__init__.py +2 -8
- mindspore/dataset/audio/transforms.py +3 -17
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +87 -6
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +6 -5
- mindspore/dataset/engine/datasets.py +292 -267
- mindspore/dataset/engine/datasets_audio.py +22 -8
- mindspore/dataset/engine/datasets_standard_format.py +46 -27
- mindspore/dataset/engine/datasets_text.py +78 -48
- mindspore/dataset/engine/datasets_user_defined.py +182 -116
- mindspore/dataset/engine/datasets_vision.py +120 -44
- mindspore/dataset/engine/iterators.py +283 -63
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +289 -43
- mindspore/dataset/engine/serializer_deserializer.py +3 -2
- mindspore/dataset/engine/validators.py +53 -11
- mindspore/dataset/text/__init__.py +7 -6
- mindspore/dataset/text/transforms.py +6 -5
- mindspore/dataset/text/utils.py +3 -3
- mindspore/dataset/transforms/__init__.py +0 -9
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +31 -14
- mindspore/dataset/utils/browse_dataset.py +1 -1
- mindspore/dataset/vision/__init__.py +2 -9
- mindspore/dataset/vision/transforms.py +202 -158
- mindspore/dataset/vision/utils.py +7 -5
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +153 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +123 -0
- mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +170 -0
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/es/embedding_service.py +35 -27
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/map_parameter.py +4 -4
- mindspore/experimental/optim/adadelta.py +6 -6
- mindspore/experimental/optim/adagrad.py +4 -4
- mindspore/experimental/optim/adam.py +7 -0
- mindspore/experimental/optim/adamax.py +4 -4
- mindspore/experimental/optim/adamw.py +4 -0
- mindspore/experimental/optim/asgd.py +1 -1
- mindspore/experimental/optim/lr_scheduler.py +73 -46
- mindspore/experimental/optim/radam.py +34 -31
- mindspore/experimental/optim/rprop.py +1 -1
- mindspore/experimental/optim/sgd.py +1 -1
- mindspore/hal/contiguous_tensors_handle.py +6 -10
- mindspore/hal/device.py +55 -53
- mindspore/hal/event.py +52 -52
- mindspore/hal/memory.py +157 -117
- mindspore/hal/stream.py +150 -109
- mindspore/include/api/context.h +0 -1
- mindspore/include/dataset/constants.h +7 -4
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +50 -0
- mindspore/mindrecord/__init__.py +21 -8
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +796 -759
- mindspore/mint/distributed/__init__.py +70 -4
- mindspore/mint/distributed/distributed.py +2679 -44
- mindspore/mint/linalg/__init__.py +8 -0
- mindspore/mint/nn/__init__.py +743 -22
- mindspore/mint/nn/functional.py +716 -23
- mindspore/mint/nn/layer/__init__.py +21 -4
- mindspore/mint/nn/layer/_functions.py +334 -0
- mindspore/mint/nn/layer/activation.py +276 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +921 -0
- mindspore/mint/nn/layer/normalization.py +223 -28
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +235 -0
- mindspore/mint/optim/__init__.py +3 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/optim/sgd.py +171 -0
- mindspore/mint/special/__init__.py +2 -1
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +4 -1
- mindspore/nn/cell.py +1370 -189
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +29 -27
- mindspore/nn/layer/basic.py +51 -35
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/container.py +1 -1
- mindspore/nn/layer/conv.py +22 -17
- mindspore/nn/layer/embedding.py +12 -11
- mindspore/nn/layer/normalization.py +56 -49
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +120 -42
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +59 -36
- mindspore/nn/learning_rate_schedule.py +8 -4
- mindspore/nn/loss/loss.py +58 -55
- mindspore/nn/optim/ada_grad.py +7 -5
- mindspore/nn/optim/adadelta.py +11 -9
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +17 -13
- mindspore/nn/optim/adamax.py +8 -7
- mindspore/nn/optim/adasum.py +5 -5
- mindspore/nn/optim/asgd.py +1 -1
- mindspore/nn/optim/ftrl.py +11 -9
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/lazyadam.py +12 -10
- mindspore/nn/optim/momentum.py +7 -6
- mindspore/nn/optim/optimizer.py +3 -3
- mindspore/nn/optim/proximal_ada_grad.py +12 -10
- mindspore/nn/optim/rmsprop.py +13 -12
- mindspore/nn/optim/rprop.py +11 -9
- mindspore/nn/optim/sgd.py +9 -6
- mindspore/nn/optim/tft_wrapper.py +5 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/probability/bijector/bijector.py +17 -11
- mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
- mindspore/nn/probability/bijector/invert.py +2 -2
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +3 -2
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +1 -1
- mindspore/nn/probability/distribution/cauchy.py +4 -2
- mindspore/nn/probability/distribution/exponential.py +6 -7
- mindspore/nn/probability/distribution/gamma.py +2 -2
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/half_normal.py +5 -3
- mindspore/nn/probability/distribution/logistic.py +5 -3
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/uniform.py +5 -3
- mindspore/nn/reinforcement/_tensors_queue.py +1 -1
- mindspore/nn/reinforcement/tensor_array.py +1 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/__init__.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +181 -122
- mindspore/nn/wrap/grad_reducer.py +45 -36
- mindspore/nn/wrap/loss_scale.py +6 -7
- mindspore/numpy/array_creations.py +63 -65
- mindspore/numpy/array_ops.py +149 -144
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +365 -363
- mindspore/numpy/utils.py +17 -18
- mindspore/numpy/utils_const.py +5 -6
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +5 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_register_for_op.py +0 -11
- mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
- mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
- mindspore/ops/_vmap/vmap_array_ops.py +27 -25
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
- mindspore/ops/_vmap/vmap_math_ops.py +15 -16
- mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
- mindspore/ops/auto_generate/__init__.py +4 -3
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
- mindspore/ops/auto_generate/gen_extend_func.py +764 -124
- mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
- mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
- mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
- mindspore/ops/composite/__init__.py +2 -1
- mindspore/ops/composite/base.py +20 -25
- mindspore/ops/composite/math_ops.py +6 -16
- mindspore/ops/composite/multitype_ops/__init__.py +5 -2
- mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
- mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
- mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
- mindspore/ops/function/__init__.py +40 -2
- mindspore/ops/function/_add_attr_func.py +58 -0
- mindspore/ops/function/array_func.py +2089 -2403
- mindspore/ops/function/clip_func.py +80 -23
- mindspore/ops/function/debug_func.py +57 -57
- mindspore/ops/function/grad/__init__.py +1 -0
- mindspore/ops/function/grad/grad_func.py +104 -71
- mindspore/ops/function/image_func.py +2 -2
- mindspore/ops/function/linalg_func.py +47 -78
- mindspore/ops/function/math_func.py +4501 -3802
- mindspore/ops/function/nn_func.py +1726 -620
- mindspore/ops/function/other_func.py +159 -1
- mindspore/ops/function/parameter_func.py +18 -84
- mindspore/ops/function/random_func.py +440 -387
- mindspore/ops/function/reshard_func.py +4 -70
- mindspore/ops/function/sparse_func.py +3 -3
- mindspore/ops/function/sparse_unary_func.py +6 -6
- mindspore/ops/function/spectral_func.py +25 -58
- mindspore/ops/function/vmap_func.py +24 -17
- mindspore/ops/functional.py +22 -7
- mindspore/ops/functional_overload.py +1440 -0
- mindspore/ops/op_info_register.py +32 -244
- mindspore/ops/operations/__init__.py +13 -7
- mindspore/ops/operations/_custom_ops_utils.py +247 -0
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -43
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +43 -84
- mindspore/ops/operations/_ms_kernel.py +4 -10
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/_scalar_ops.py +3 -2
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/array_ops.py +81 -324
- mindspore/ops/operations/comm_ops.py +154 -108
- mindspore/ops/operations/custom_ops.py +232 -78
- mindspore/ops/operations/debug_ops.py +153 -59
- mindspore/ops/operations/inner_ops.py +7 -5
- mindspore/ops/operations/linalg_ops.py +1 -57
- mindspore/ops/operations/manually_defined/_inner.py +1 -1
- mindspore/ops/operations/manually_defined/ops_def.py +928 -180
- mindspore/ops/operations/math_ops.py +32 -234
- mindspore/ops/operations/nn_ops.py +210 -498
- mindspore/ops/operations/other_ops.py +62 -9
- mindspore/ops/operations/random_ops.py +13 -7
- mindspore/ops/operations/reshard_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +2 -2
- mindspore/ops/primitive.py +66 -53
- mindspore/ops/tensor_method.py +1888 -0
- mindspore/ops_generate/__init__.py +0 -5
- mindspore/ops_generate/aclnn/__init__.py +0 -0
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
- mindspore/ops_generate/api/__init__.py +0 -0
- mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
- mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
- mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
- mindspore/ops_generate/api/functions_cc_generator.py +237 -0
- mindspore/ops_generate/api/gen_api.py +103 -0
- mindspore/ops_generate/api/op_api_proto.py +235 -0
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
- mindspore/ops_generate/common/__init__.py +0 -0
- mindspore/ops_generate/common/base_generator.py +11 -0
- mindspore/ops_generate/common/gen_constants.py +91 -0
- mindspore/ops_generate/common/gen_utils.py +348 -0
- mindspore/ops_generate/common/op_proto.py +473 -0
- mindspore/ops_generate/common/template.py +523 -0
- mindspore/ops_generate/gen_ops.py +22 -1069
- mindspore/ops_generate/op_def/__init__.py +0 -0
- mindspore/ops_generate/op_def/gen_op_def.py +90 -0
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
- mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
- mindspore/ops_generate/op_def_py/__init__.py +0 -0
- mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
- mindspore/ops_generate/pyboost/__init__.py +0 -0
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
- mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
- mindspore/ops_generate/resources/__init__.py +0 -0
- mindspore/ops_generate/resources/resource_list.py +30 -0
- mindspore/ops_generate/resources/resource_loader.py +36 -0
- mindspore/ops_generate/resources/resource_manager.py +64 -0
- mindspore/ops_generate/resources/yaml_loader.py +88 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
- mindspore/parallel/__init__.py +7 -3
- mindspore/parallel/_auto_parallel_context.py +152 -34
- mindspore/parallel/_cell_wrapper.py +130 -15
- mindspore/parallel/_parallel_serialization.py +107 -5
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +7 -2
- mindspore/parallel/_tensor.py +142 -18
- mindspore/parallel/_utils.py +199 -23
- mindspore/parallel/algo_parameter_config.py +4 -4
- mindspore/parallel/auto_parallel.py +732 -0
- mindspore/parallel/checkpoint_convert.py +159 -0
- mindspore/parallel/checkpoint_transform.py +698 -35
- mindspore/parallel/cluster/process_entity/_api.py +276 -50
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +21 -4
- mindspore/parallel/function/__init__.py +24 -0
- mindspore/parallel/function/reshard_func.py +259 -0
- mindspore/parallel/nn/__init__.py +25 -0
- mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
- mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
- mindspore/parallel/parameter_broadcast.py +25 -14
- mindspore/parallel/shard.py +137 -58
- mindspore/parallel/transform_safetensors.py +363 -305
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +22 -5
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +186 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +221 -0
- mindspore/profiler/common/path_manager.py +395 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +500 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_meta_data.py +74 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +251 -0
- mindspore/profiler/common/profiler_path_manager.py +179 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +341 -75
- mindspore/profiler/envprofiler.py +163 -0
- mindspore/profiler/experimental_config.py +197 -0
- mindspore/profiler/mstx.py +242 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +335 -0
- mindspore/profiler/profiler.py +1073 -90
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +118 -0
- mindspore/profiler/schedule.py +243 -0
- mindspore/rewrite/api/node.py +15 -13
- mindspore/rewrite/api/symbol_tree.py +2 -3
- mindspore/run_check/_check_version.py +27 -20
- mindspore/run_check/run_check.py +1 -1
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +177 -0
- mindspore/runtime/memory.py +409 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/safeguard/rewrite_obfuscation.py +12 -9
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +8 -8
- mindspore/train/_utils.py +88 -25
- mindspore/train/amp.py +9 -5
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +2 -16
- mindspore/train/callback/_checkpoint.py +53 -55
- mindspore/train/callback/_cluster_monitor.py +14 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +103 -68
- mindspore/train/callback/_history.py +8 -5
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +0 -3
- mindspore/train/callback/_loss_monitor.py +2 -1
- mindspore/train/callback/_on_request_exit.py +6 -5
- mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
- mindspore/train/callback/_summary_collector.py +52 -19
- mindspore/train/callback/_time_monitor.py +2 -1
- mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
- mindspore/train/data_sink.py +25 -2
- mindspore/train/dataset_helper.py +15 -16
- mindspore/train/loss_scale_manager.py +8 -7
- mindspore/train/metrics/accuracy.py +3 -3
- mindspore/train/metrics/confusion_matrix.py +9 -9
- mindspore/train/metrics/error.py +3 -3
- mindspore/train/metrics/hausdorff_distance.py +4 -4
- mindspore/train/metrics/mean_surface_distance.py +3 -3
- mindspore/train/metrics/metric.py +0 -12
- mindspore/train/metrics/occlusion_sensitivity.py +4 -2
- mindspore/train/metrics/precision.py +11 -10
- mindspore/train/metrics/recall.py +9 -9
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +174 -46
- mindspore/train/model.py +184 -113
- mindspore/train/serialization.py +622 -978
- mindspore/train/summary/_summary_adapter.py +2 -2
- mindspore/train/summary/summary_record.py +2 -3
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dryrun.py +140 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/utils/utils.py +138 -4
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +587 -418
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
- mindspore/_install_custom.py +0 -43
- mindspore/common/_register_for_adapter.py +0 -74
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
- mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
- mindspore/ops_generate/gen_aclnn_implement.py +0 -263
- mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
- mindspore/ops_generate/gen_pyboost_func.py +0 -1052
- mindspore/ops_generate/gen_utils.py +0 -209
- mindspore/ops_generate/op_proto.py +0 -145
- mindspore/ops_generate/template.py +0 -261
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -35,11 +35,9 @@ import stat
|
|
|
35
35
|
import subprocess
|
|
36
36
|
import warnings
|
|
37
37
|
|
|
38
|
-
import gc
|
|
39
38
|
import time
|
|
40
39
|
import uuid
|
|
41
40
|
import multiprocessing
|
|
42
|
-
from enum import Enum
|
|
43
41
|
from importlib import import_module
|
|
44
42
|
import sys
|
|
45
43
|
import threading
|
|
@@ -53,18 +51,18 @@ import mindspore._c_dataengine as cde
|
|
|
53
51
|
from mindspore._c_expression import typing
|
|
54
52
|
|
|
55
53
|
from mindspore import log as logger
|
|
56
|
-
from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context
|
|
57
|
-
|
|
54
|
+
from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context, \
|
|
55
|
+
_enable_distributed_mindrt
|
|
58
56
|
from mindspore.dataset.engine.offload import GetOffloadModel
|
|
59
|
-
|
|
57
|
+
from mindspore.communication.management import get_group_size
|
|
60
58
|
import mindspore.dataset.transforms.c_transforms as c_transforms
|
|
61
59
|
import mindspore.dataset.transforms.py_transforms as py_transforms
|
|
62
60
|
import mindspore.dataset.transforms as transforms
|
|
63
61
|
from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCEPIECE_MODE
|
|
64
|
-
from mindspore.parallel._utils import _get_device_num
|
|
65
62
|
from mindspore.dataset.debug import DebugHook
|
|
66
63
|
|
|
67
64
|
from mindspore.dataset.engine import samplers
|
|
65
|
+
from mindspore.dataset.engine.samplers import Shuffle
|
|
68
66
|
from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
|
|
69
67
|
ITERATORS_LIST, _unset_iterator_cleanup, _cleanup_the_iterators_if_created
|
|
70
68
|
from .queue import _SharedQueue, _Queue
|
|
@@ -74,13 +72,14 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
|
|
|
74
72
|
check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_padded_batch, \
|
|
75
73
|
check_total_batch, check_sync_update
|
|
76
74
|
from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
|
|
77
|
-
get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval,
|
|
75
|
+
get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, \
|
|
76
|
+
_get_debug_hook_list, get_multiprocessing_start_method
|
|
78
77
|
from ..core.datatypes import mstype_to_detype
|
|
79
78
|
from ..core.validator_helpers import replace_none
|
|
80
79
|
from ..core.py_util_helpers import ExceptionHandler
|
|
81
80
|
from ..transforms.py_transforms_util import FuncWrapper, Implementation
|
|
82
81
|
from ..vision.transforms import ToNumpy
|
|
83
|
-
from ...mindrecord.config import _get_enc_key, _get_enc_mode,
|
|
82
|
+
from ...mindrecord.config import _get_enc_key, _get_enc_mode, encrypt
|
|
84
83
|
|
|
85
84
|
try:
|
|
86
85
|
context = import_module("mindspore.context")
|
|
@@ -136,71 +135,6 @@ def _reset_training_dataset(global_step, dataset_size):
|
|
|
136
135
|
raise RuntimeError("Training dataset is not set.")
|
|
137
136
|
|
|
138
137
|
|
|
139
|
-
class Shuffle(str, Enum):
|
|
140
|
-
"""Specify the shuffle mode.
|
|
141
|
-
|
|
142
|
-
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
143
|
-
- ``Shuffle.FILES`` : Shuffle files only.
|
|
144
|
-
- ``Shuffle.INFILE`` : Shuffle data within each file.
|
|
145
|
-
"""
|
|
146
|
-
GLOBAL: str = "global"
|
|
147
|
-
FILES: str = "files"
|
|
148
|
-
INFILE: str = "infile"
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
ShuffleToShuffleMode = {Shuffle.FILES: cde.ShuffleMode.FILES,
|
|
152
|
-
Shuffle.GLOBAL: cde.ShuffleMode.GLOBAL,
|
|
153
|
-
Shuffle.INFILE: cde.ShuffleMode.INFILE}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def shuffle_to_shuffle_mode(shuffle):
|
|
157
|
-
"""
|
|
158
|
-
Shuffle Enum to Shuffle Mode
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
shuffle (Shuffle): shuffle flag to shuffle mode in C layer
|
|
162
|
-
|
|
163
|
-
Returns:
|
|
164
|
-
ShuffleMode, shuffle mode
|
|
165
|
-
"""
|
|
166
|
-
shuffle_mode = cde.ShuffleMode.GLOBAL # Global shuffle
|
|
167
|
-
if not isinstance(shuffle, Shuffle):
|
|
168
|
-
if shuffle is None or shuffle:
|
|
169
|
-
shuffle_mode = cde.ShuffleMode.GLOBAL # Global shuffle
|
|
170
|
-
else:
|
|
171
|
-
shuffle_mode = cde.ShuffleMode.FALSE # No shuffle
|
|
172
|
-
else:
|
|
173
|
-
shuffle_mode = ShuffleToShuffleMode[shuffle]
|
|
174
|
-
return shuffle_mode
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def shuffle_to_bool(shuffle):
|
|
178
|
-
"""
|
|
179
|
-
Shuffle Enum to bool
|
|
180
|
-
|
|
181
|
-
Args:
|
|
182
|
-
shuffle (Shuffle): shuffle flag to bool
|
|
183
|
-
|
|
184
|
-
Returns:
|
|
185
|
-
bool, True / False
|
|
186
|
-
"""
|
|
187
|
-
if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
|
|
188
|
-
raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.GLOBAL' or "
|
|
189
|
-
"'Shuffle.FILES' or 'Shuffle.INFILE'.")
|
|
190
|
-
|
|
191
|
-
shuffle_bool = True
|
|
192
|
-
if not isinstance(shuffle, Shuffle):
|
|
193
|
-
if shuffle is None:
|
|
194
|
-
shuffle_bool = None
|
|
195
|
-
elif shuffle:
|
|
196
|
-
shuffle_bool = True
|
|
197
|
-
else:
|
|
198
|
-
shuffle_bool = False
|
|
199
|
-
else:
|
|
200
|
-
shuffle_bool = True
|
|
201
|
-
return shuffle_bool
|
|
202
|
-
|
|
203
|
-
|
|
204
138
|
@check_zip
|
|
205
139
|
def zip(datasets):
|
|
206
140
|
"""
|
|
@@ -403,6 +337,7 @@ class Dataset:
|
|
|
403
337
|
parent = self.parent
|
|
404
338
|
self.parent = []
|
|
405
339
|
dataset = copy.deepcopy(self)
|
|
340
|
+
dataset = self.pre_process(dataset)
|
|
406
341
|
global _OP_NAME
|
|
407
342
|
_OP_NAME = Dataset._get_operator_id(dataset)
|
|
408
343
|
ir_tree = dataset.parse_tree(getter_mode)
|
|
@@ -410,6 +345,19 @@ class Dataset:
|
|
|
410
345
|
_init_device_info()
|
|
411
346
|
return ir_tree, dataset
|
|
412
347
|
|
|
348
|
+
def pre_process(self, dataset):
|
|
349
|
+
"""Insert batch operation for GeneratorDataset with batch_sampler."""
|
|
350
|
+
if hasattr(dataset, "has_batch_sampler") and dataset.has_batch_sampler:
|
|
351
|
+
original_parent = dataset.parent
|
|
352
|
+
dataset.parent = []
|
|
353
|
+
dataset = dataset.batch(batch_size=-1, num_parallel_workers=dataset.num_parallel_workers,
|
|
354
|
+
per_batch_map=dataset.collate_fn)
|
|
355
|
+
dataset.parent = original_parent
|
|
356
|
+
else:
|
|
357
|
+
for index in range(len(dataset.children)):
|
|
358
|
+
dataset.children[index] = self.pre_process(dataset.children[index])
|
|
359
|
+
return dataset
|
|
360
|
+
|
|
413
361
|
def parse_tree(self, getter_mode=False):
|
|
414
362
|
"""
|
|
415
363
|
Internal method to parse the API tree into an IR tree.
|
|
@@ -495,8 +443,7 @@ class Dataset:
|
|
|
495
443
|
.. image:: bucket_batch_by_length_en.png
|
|
496
444
|
|
|
497
445
|
Note:
|
|
498
|
-
- When using
|
|
499
|
-
sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
|
|
446
|
+
- When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
|
|
500
447
|
You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
|
|
501
448
|
or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
|
|
502
449
|
|
|
@@ -561,7 +508,7 @@ class Dataset:
|
|
|
561
508
|
@check_batch
|
|
562
509
|
def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs):
|
|
563
510
|
"""
|
|
564
|
-
Combine batch_size number of consecutive rows into batch which apply per_batch_map to the samples first.
|
|
511
|
+
Combine `batch_size` number of consecutive rows into batch which apply `per_batch_map` to the samples first.
|
|
565
512
|
|
|
566
513
|
For any column, all the elements within that column must have the same shape.
|
|
567
514
|
|
|
@@ -572,8 +519,7 @@ class Dataset:
|
|
|
572
519
|
Note:
|
|
573
520
|
- The order of using repeat and batch reflects the number of batches and per_batch_map.
|
|
574
521
|
It is recommended that the repeat operation applied after the batch operation finished.
|
|
575
|
-
- When using
|
|
576
|
-
sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
|
|
522
|
+
- When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
|
|
577
523
|
You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
|
|
578
524
|
or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
|
|
579
525
|
|
|
@@ -615,13 +561,19 @@ class Dataset:
|
|
|
615
561
|
|
|
616
562
|
- max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
|
|
617
563
|
allocation to copy data between processes, the total occupied shared memory will increase as
|
|
618
|
-
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
|
|
619
|
-
|
|
620
|
-
``
|
|
621
|
-
|
|
622
|
-
If
|
|
623
|
-
|
|
624
|
-
|
|
564
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
|
|
565
|
+
This is only used if ``python_multiprocessing`` is set to ``True``.
|
|
566
|
+
Default: ``None`` , allocate shared memory dynamically (deprecated in future version).
|
|
567
|
+
|
|
568
|
+
- If set to ``-1`` / ``None``, shared memory will be dynamically allocated with the
|
|
569
|
+
actual size of data.
|
|
570
|
+
|
|
571
|
+
- If it is an int value, it represents ``input_columns`` and ``output_columns`` use this value as the
|
|
572
|
+
unit to create shared memory.
|
|
573
|
+
|
|
574
|
+
- If it is a list, represents the ``input_columns`` use the first element as the unit to
|
|
575
|
+
create shared memory, and represents ``output_columns`` use the second element as the
|
|
576
|
+
unit to create shared memory.
|
|
625
577
|
|
|
626
578
|
Returns:
|
|
627
579
|
Dataset, a new dataset with the above operation applied.
|
|
@@ -669,8 +621,7 @@ class Dataset:
|
|
|
669
621
|
Note:
|
|
670
622
|
- The order of using repeat and padded_batch reflects the number of batches.
|
|
671
623
|
It is recommended that the repeat operation applied after the padded_batch operation finished.
|
|
672
|
-
- When using
|
|
673
|
-
sink_mode.html#data-sinking>`_ in Graph mode, the input shape of the network should keep consistent.
|
|
624
|
+
- When using Data Sinking in Graph mode, the input shape of the network should keep consistent.
|
|
674
625
|
You should set `drop_remainder` to "True" to discard the last incomplete batch of data,
|
|
675
626
|
or supplement/remove samples to ensure the dataset size is divisible by `batch_size`.
|
|
676
627
|
|
|
@@ -724,9 +675,9 @@ class Dataset:
|
|
|
724
675
|
|
|
725
676
|
Args:
|
|
726
677
|
condition_name (str): The condition name that is used to toggle sending next row.
|
|
727
|
-
num_batch (int): the number of batches without blocking at the start of each epoch.
|
|
678
|
+
num_batch (int, optional): the number of batches without blocking at the start of each epoch.
|
|
728
679
|
Default: ``1``.
|
|
729
|
-
callback (function): The callback function that will be invoked when sync_update is called.
|
|
680
|
+
callback (function, optional): The callback function that will be invoked when sync_update is called.
|
|
730
681
|
Default: ``None``.
|
|
731
682
|
|
|
732
683
|
Returns:
|
|
@@ -911,15 +862,21 @@ class Dataset:
|
|
|
911
862
|
- python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
|
|
912
863
|
This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
|
|
913
864
|
|
|
914
|
-
- max_rowsize
|
|
915
|
-
|
|
916
|
-
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
|
|
917
|
-
|
|
918
|
-
``
|
|
919
|
-
|
|
920
|
-
If
|
|
921
|
-
|
|
922
|
-
|
|
865
|
+
- max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
|
|
866
|
+
allocation to copy data between processes, the total occupied shared memory will increase as
|
|
867
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase.
|
|
868
|
+
This is only used if ``python_multiprocessing`` is set to ``True``.
|
|
869
|
+
Default: ``None`` , allocate shared memory dynamically (deprecated in future version).
|
|
870
|
+
|
|
871
|
+
- If set to ``-1`` / ``None``, shared memory will be dynamically allocated with the
|
|
872
|
+
actual size of data.
|
|
873
|
+
|
|
874
|
+
- If it is an int value, it represents ``input_columns`` and ``output_columns`` use this value as the
|
|
875
|
+
unit to create shared memory.
|
|
876
|
+
|
|
877
|
+
- If it is a list, the first element represents the ``input_columns`` use this value as the unit to
|
|
878
|
+
create shared memory, and the second element represents ``output_columns`` use this value as the
|
|
879
|
+
unit to create shared memory.
|
|
923
880
|
|
|
924
881
|
- cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
|
925
882
|
Default: ``None``, which means no cache is used.
|
|
@@ -932,8 +889,29 @@ class Dataset:
|
|
|
932
889
|
Note:
|
|
933
890
|
- Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
|
|
934
891
|
Python functions (PyFuncs).
|
|
935
|
-
-
|
|
936
|
-
`
|
|
892
|
+
- Setting the start method of multiprocessing to `spawn` mode by
|
|
893
|
+
ds.config.set_multiprocessing_start_method("spawn") with `python_ multiprocessing=True`
|
|
894
|
+
and `num_parallel_workers>1` supports adding network computing operators from mindspore.nn and
|
|
895
|
+
mindspore.ops or other network computing operators into this `operations` .
|
|
896
|
+
Otherwise, adding to `operations` is not supported.
|
|
897
|
+
- Currently only some scenarios support calling DVPP operators in Python functions passed in with the
|
|
898
|
+
`operations` parameter:
|
|
899
|
+
|
|
900
|
+
+---------------+----------------------------+----------------------------+----------------------------+
|
|
901
|
+
| | | Multiprocessing |
|
|
902
|
+
| | Multithreading +----------------------------+----------------------------+
|
|
903
|
+
| | | spawn | fork |
|
|
904
|
+
+===============+============================+============================+============================+
|
|
905
|
+
|Independent |Data Processing: support |Data Processing: support |Data Processing: support |
|
|
906
|
+
| | | | |
|
|
907
|
+
|process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
|
|
908
|
+
| |training: not support |training: support |training: not support |
|
|
909
|
+
+---------------+----------------------------+----------------------------+----------------------------+
|
|
910
|
+
|Non-independent|Data Processing: support |Data Processing: support |Data Processing: support |
|
|
911
|
+
| | | | |
|
|
912
|
+
|process mode |Data Processing + Network |Data Processing + Network |Data Processing + Network |
|
|
913
|
+
| |training: support |training: support |training: not support |
|
|
914
|
+
+---------------+----------------------------+----------------------------+----------------------------+
|
|
937
915
|
|
|
938
916
|
Returns:
|
|
939
917
|
Dataset, a new dataset with the above operation applied.
|
|
@@ -1557,8 +1535,8 @@ class Dataset:
|
|
|
1557
1535
|
>>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
|
1558
1536
|
>>> d1.save('/path/to/save_file')
|
|
1559
1537
|
"""
|
|
1560
|
-
if
|
|
1561
|
-
raise RuntimeError("When encode mode
|
|
1538
|
+
if _get_enc_key() is not None and num_files > 1:
|
|
1539
|
+
raise RuntimeError("When encode mode is enabled, " +
|
|
1562
1540
|
"the automatic sharding function is unavailable.")
|
|
1563
1541
|
|
|
1564
1542
|
ir_tree, api_tree = self.create_ir_tree()
|
|
@@ -1571,10 +1549,6 @@ class Dataset:
|
|
|
1571
1549
|
|
|
1572
1550
|
consumer.Save()
|
|
1573
1551
|
|
|
1574
|
-
if _get_hash_mode() is not None:
|
|
1575
|
-
append_hash_to_file(file_name)
|
|
1576
|
-
append_hash_to_file(file_name + ".db")
|
|
1577
|
-
|
|
1578
1552
|
if _get_enc_key() is not None:
|
|
1579
1553
|
encrypt(file_name, _get_enc_key(), _get_enc_mode())
|
|
1580
1554
|
encrypt(file_name + ".db", _get_enc_key(), _get_enc_mode())
|
|
@@ -1761,7 +1735,7 @@ class Dataset:
|
|
|
1761
1735
|
Get the shapes of output data.
|
|
1762
1736
|
|
|
1763
1737
|
Args:
|
|
1764
|
-
estimate (bool): If `estimate` is ``False`` , will return the shapes of first data row.
|
|
1738
|
+
estimate (bool, optional): If `estimate` is ``False`` , will return the shapes of first data row.
|
|
1765
1739
|
Otherwise, will iterate the whole dataset and return the estimated shapes of data row,
|
|
1766
1740
|
where dynamic shape is marked as None (used in dynamic data shapes scenario).
|
|
1767
1741
|
Default: ``False`` .
|
|
@@ -2338,10 +2312,10 @@ class SourceDataset(Dataset):
|
|
|
2338
2312
|
self.shard_id = replace_none(shard_id, 0)
|
|
2339
2313
|
|
|
2340
2314
|
if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
|
|
2341
|
-
raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.
|
|
2342
|
-
"'Shuffle.FILES' or 'Shuffle.INFILE'.")
|
|
2315
|
+
raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.ADAPTIVE' or "
|
|
2316
|
+
"'Shuffle.GLOBAL' or 'Shuffle.PARTIAL' or 'Shuffle.FILES' or 'Shuffle.INFILE'.")
|
|
2343
2317
|
|
|
2344
|
-
self.shuffle_flag =
|
|
2318
|
+
self.shuffle_flag = 5 # Adaptive shuffle
|
|
2345
2319
|
if not isinstance(shuffle, Shuffle):
|
|
2346
2320
|
if shuffle is None or shuffle:
|
|
2347
2321
|
self.shuffle_flag = 2 # Global shuffle
|
|
@@ -2354,6 +2328,10 @@ class SourceDataset(Dataset):
|
|
|
2354
2328
|
self.shuffle_flag = 1 # Files shuffle
|
|
2355
2329
|
elif shuffle == Shuffle.INFILE:
|
|
2356
2330
|
self.shuffle_flag = 3 # Infile shuffle
|
|
2331
|
+
elif shuffle == Shuffle.ADAPTIVE:
|
|
2332
|
+
self.shuffle_flag = 5
|
|
2333
|
+
elif shuffle == Shuffle.PARTIAL:
|
|
2334
|
+
self.shuffle_flag = 4
|
|
2357
2335
|
|
|
2358
2336
|
def parse(self, children=None):
|
|
2359
2337
|
raise NotImplementedError("Dataset has to implement parse method.")
|
|
@@ -2410,15 +2388,23 @@ class MappableDataset(SourceDataset):
|
|
|
2410
2388
|
def __init__(self, num_parallel_workers=None, sampler=None, num_samples=None, shuffle=None, num_shards=None,
|
|
2411
2389
|
shard_id=None, cache=None):
|
|
2412
2390
|
num_shards, shard_id = self._update_data_shard(num_shards, shard_id)
|
|
2391
|
+
if sampler is None:
|
|
2392
|
+
if shuffle is None or shuffle is True:
|
|
2393
|
+
shuffle = Shuffle.GLOBAL
|
|
2394
|
+
elif shuffle is False:
|
|
2395
|
+
shuffle = Shuffle.FALSE
|
|
2413
2396
|
super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
|
|
2414
2397
|
num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
2415
|
-
self.shuffle_flag = replace_none(shuffle, True)
|
|
2416
2398
|
self.sampler = samplers.select_sampler(num_samples, sampler, shuffle, num_shards, shard_id)
|
|
2417
2399
|
|
|
2418
2400
|
def add_sampler(self, new_sampler):
|
|
2419
2401
|
"""
|
|
2420
2402
|
Add a child sampler for the current dataset.
|
|
2421
2403
|
|
|
2404
|
+
Note:
|
|
2405
|
+
- If the sampler is added and it has a shuffle option, its value must be ``Shuffle.GLOBAL`` .
|
|
2406
|
+
Additionally, the original sampler's shuffle value cannot be ``Shuffle.PARTIAL`` .
|
|
2407
|
+
|
|
2422
2408
|
Args:
|
|
2423
2409
|
new_sampler (Sampler): The child sampler to be added.
|
|
2424
2410
|
|
|
@@ -2432,6 +2418,16 @@ class MappableDataset(SourceDataset):
|
|
|
2432
2418
|
# Note: By adding a sampler, the sampled IDs will flow to the new_sampler
|
|
2433
2419
|
# after first passing through the current samplers attached to this dataset.
|
|
2434
2420
|
self.dataset_size = None
|
|
2421
|
+
|
|
2422
|
+
if self.sampler is not None and self.sampler.get_shuffle_mode() == Shuffle.PARTIAL:
|
|
2423
|
+
raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the current sampler "
|
|
2424
|
+
"must not be Shuffle.PARTIAL.")
|
|
2425
|
+
|
|
2426
|
+
if new_sampler.get_shuffle_mode() != Shuffle.GLOBAL and new_sampler.get_shuffle_mode() != Shuffle.FALSE:
|
|
2427
|
+
raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the input sampler "
|
|
2428
|
+
"must be Shuffle.FALSE or Shuffle.GLOBAL, but got: {}."
|
|
2429
|
+
.format(new_sampler.get_shuffle_mode()))
|
|
2430
|
+
|
|
2435
2431
|
new_sampler.add_child(self.sampler)
|
|
2436
2432
|
self.sampler = new_sampler
|
|
2437
2433
|
|
|
@@ -2594,7 +2590,7 @@ def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
|
|
|
2594
2590
|
threshold_ratio = 0.8
|
|
2595
2591
|
# Verify available size only when using static shared memory on Linux
|
|
2596
2592
|
if platform.system().lower() not in {"windows", "darwin"} and in_rowsize != -1 and out_rowsize != -1:
|
|
2597
|
-
device_num =
|
|
2593
|
+
device_num = get_group_size()
|
|
2598
2594
|
# In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
|
|
2599
2595
|
# on the ascend server is 8.
|
|
2600
2596
|
if device_num > 1:
|
|
@@ -2680,11 +2676,6 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2680
2676
|
else:
|
|
2681
2677
|
self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
|
|
2682
2678
|
|
|
2683
|
-
def __del__(self):
|
|
2684
|
-
if hasattr(self, "process_pool") and self.process_pool is not None:
|
|
2685
|
-
self.process_pool.terminate()
|
|
2686
|
-
del self.process_pool
|
|
2687
|
-
|
|
2688
2679
|
def parse(self, children=None):
|
|
2689
2680
|
return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, False, self.input_columns,
|
|
2690
2681
|
self.output_columns, self.batch_size_func, self.per_batch_map, {},
|
|
@@ -2747,8 +2738,8 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2747
2738
|
if self.num_parallel_workers is None:
|
|
2748
2739
|
self.num_parallel_workers = get_num_parallel_workers()
|
|
2749
2740
|
|
|
2750
|
-
self.process_pool = _PythonMultiprocessing(
|
|
2751
|
-
self.max_rowsize)
|
|
2741
|
+
self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(), self.num_parallel_workers,
|
|
2742
|
+
str(self), [self.per_batch_map], self.max_rowsize)
|
|
2752
2743
|
# Wrap per_batch_map into _PythonCallable
|
|
2753
2744
|
self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
|
|
2754
2745
|
else:
|
|
@@ -3023,7 +3014,7 @@ class SyncWaitDataset(UnionBaseDataset):
|
|
|
3023
3014
|
|
|
3024
3015
|
class ShuffleDataset(UnionBaseDataset):
|
|
3025
3016
|
"""
|
|
3026
|
-
The result of applying
|
|
3017
|
+
The result of applying shuffle operation to the input Dataset.
|
|
3027
3018
|
|
|
3028
3019
|
Args:
|
|
3029
3020
|
input_dataset (Dataset): Input Dataset to be shuffled.
|
|
@@ -3200,9 +3191,21 @@ def _worker_loop(operations, pipe, worker_id):
|
|
|
3200
3191
|
|
|
3201
3192
|
|
|
3202
3193
|
def worker_target(operations, worker_id):
|
|
3194
|
+
logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
|
|
3203
3195
|
return lambda pipe: _worker_loop(operations, pipe, worker_id)
|
|
3204
3196
|
|
|
3205
3197
|
|
|
3198
|
+
class WorkerTarget:
|
|
3199
|
+
def __init__(self, operations, pipe, worker_id):
|
|
3200
|
+
self.operations = operations
|
|
3201
|
+
self.pipe = pipe
|
|
3202
|
+
self.worker_id = worker_id
|
|
3203
|
+
logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
|
|
3204
|
+
|
|
3205
|
+
def __call__(self):
|
|
3206
|
+
return _worker_loop(self.operations, self.pipe, self.worker_id)
|
|
3207
|
+
|
|
3208
|
+
|
|
3206
3209
|
class _MPWorker(multiprocessing.Process):
|
|
3207
3210
|
"""
|
|
3208
3211
|
Worker process for multiprocessing.
|
|
@@ -3257,6 +3260,12 @@ class _MPWorker(multiprocessing.Process):
|
|
|
3257
3260
|
|
|
3258
3261
|
logger.info(f"Closing worker with PID: {self.pid}")
|
|
3259
3262
|
self.pipe.master_close()
|
|
3263
|
+
|
|
3264
|
+
process_dir = os.path.join('/proc', str(self.pid))
|
|
3265
|
+
while self.is_alive() and os.path.exists(process_dir):
|
|
3266
|
+
logger.info("Waiting for worker {} closed ...".format(self.pid))
|
|
3267
|
+
time.sleep(0.001)
|
|
3268
|
+
|
|
3260
3269
|
# del the handle which hold by master
|
|
3261
3270
|
del self.pipe.in_queue
|
|
3262
3271
|
del self.pipe.res_queue
|
|
@@ -3276,6 +3285,41 @@ class _MPWorker(multiprocessing.Process):
|
|
|
3276
3285
|
return False
|
|
3277
3286
|
|
|
3278
3287
|
|
|
3288
|
+
def worker_is_alive(worker):
|
|
3289
|
+
"""Check the subprocess worker status in spawn mode"""
|
|
3290
|
+
try:
|
|
3291
|
+
return worker.is_alive()
|
|
3292
|
+
except ValueError:
|
|
3293
|
+
return False
|
|
3294
|
+
|
|
3295
|
+
|
|
3296
|
+
def close_worker(worker, pipe):
|
|
3297
|
+
"""Close the subprocess worker in spawn mode"""
|
|
3298
|
+
try:
|
|
3299
|
+
if worker_is_alive(worker):
|
|
3300
|
+
# release the eager executor which is used by current process
|
|
3301
|
+
transforms.transforms.clean_unused_executors()
|
|
3302
|
+
|
|
3303
|
+
logger.info(f"Closing worker with PID: {worker.pid}")
|
|
3304
|
+
pipe.master_close()
|
|
3305
|
+
|
|
3306
|
+
process_dir = os.path.join('/proc', str(worker.pid))
|
|
3307
|
+
while worker_is_alive(worker) and os.path.exists(process_dir):
|
|
3308
|
+
logger.info("Waiting for worker {} closed ...".format(worker.pid))
|
|
3309
|
+
time.sleep(0.5)
|
|
3310
|
+
|
|
3311
|
+
# del the handle which hold by master
|
|
3312
|
+
del pipe.in_queue
|
|
3313
|
+
del pipe.res_queue
|
|
3314
|
+
worker.terminate()
|
|
3315
|
+
worker.join()
|
|
3316
|
+
worker.close()
|
|
3317
|
+
except ValueError:
|
|
3318
|
+
# Process has been closed already
|
|
3319
|
+
return
|
|
3320
|
+
return
|
|
3321
|
+
|
|
3322
|
+
|
|
3279
3323
|
class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
3280
3324
|
"""
|
|
3281
3325
|
A wrapper to multiprocessing.pool that performs cleanup and ensure proper termination of forked processes.
|
|
@@ -3302,10 +3346,11 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3302
3346
|
self.origin_hook(ex_type, value, tb)
|
|
3303
3347
|
self.mp_pool_exit_preprocess()
|
|
3304
3348
|
|
|
3305
|
-
def __init__(self,
|
|
3349
|
+
def __init__(self, start_method, num_parallel_workers, op_name, operations, max_rowsize=(-1, -1)):
|
|
3306
3350
|
super(_PythonMultiprocessing, self).__init__()
|
|
3307
|
-
self.
|
|
3351
|
+
self.start_method = start_method # python multiprocssing start method: fork / spawn
|
|
3308
3352
|
self.num_parallel_workers = num_parallel_workers
|
|
3353
|
+
self.op_name = op_name
|
|
3309
3354
|
self.operations = operations
|
|
3310
3355
|
self.max_rowsize = max_rowsize
|
|
3311
3356
|
|
|
@@ -3316,14 +3361,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3316
3361
|
self.queues_map = {}
|
|
3317
3362
|
self.next_queue = 0
|
|
3318
3363
|
|
|
3319
|
-
self.
|
|
3320
|
-
self.watch_dog = None
|
|
3364
|
+
self.cleaning_process = None
|
|
3321
3365
|
self.ppid = None
|
|
3322
3366
|
self.hook = None
|
|
3323
3367
|
self.warning_ctl = None
|
|
3324
3368
|
# cache thread (get_ident()) to worker_id mapping in Python layer
|
|
3325
3369
|
self.python_threads_to_workers = {}
|
|
3326
3370
|
self.eof = None
|
|
3371
|
+
self.running = False
|
|
3327
3372
|
|
|
3328
3373
|
def __del__(self):
|
|
3329
3374
|
try:
|
|
@@ -3331,60 +3376,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3331
3376
|
except TypeError:
|
|
3332
3377
|
pass
|
|
3333
3378
|
|
|
3334
|
-
# This wait function is for cleaning zombie subprocesses
|
|
3335
|
-
@staticmethod
|
|
3336
|
-
def wait_pid():
|
|
3337
|
-
"""
|
|
3338
|
-
This function is used by the main process to release subprocess resources.
|
|
3339
|
-
"""
|
|
3340
|
-
try:
|
|
3341
|
-
while True:
|
|
3342
|
-
child_pid, _ = os.waitpid(-1, os.WNOHANG)
|
|
3343
|
-
if child_pid == 0:
|
|
3344
|
-
break
|
|
3345
|
-
except OSError:
|
|
3346
|
-
# waitpid may fail for some reason, so we ignore this error
|
|
3347
|
-
pass
|
|
3348
|
-
|
|
3349
|
-
# Dataset need watch_dog thread to monitoring fork multiprocessing,
|
|
3350
|
-
# and thread can't be a member function otherwise python won't collect and release resources.
|
|
3351
|
-
@staticmethod
|
|
3352
|
-
def _watch_dog(eot, workers):
|
|
3353
|
-
"""
|
|
3354
|
-
This thread is for monitoring subprocesses forked by GeneratorDataset/map/batch
|
|
3355
|
-
"""
|
|
3356
|
-
if not isinstance(workers, list):
|
|
3357
|
-
raise TypeError("[Internal Error] The 2nd parameter of watch dog thread should be list of process, "
|
|
3358
|
-
"but got {}.".format(type(workers)))
|
|
3359
|
-
|
|
3360
|
-
while not eot.is_set():
|
|
3361
|
-
# Monitoring and count how many subprocesses already exit
|
|
3362
|
-
clear_subprocess_timeout = _PythonMultiprocessing._monitor_subprocess_exit(workers)
|
|
3363
|
-
# If find subprocess exit, we will wait for 30s and do some waitpid operations
|
|
3364
|
-
if clear_subprocess_timeout > 0:
|
|
3365
|
-
start = time.time()
|
|
3366
|
-
while time.time() - start < clear_subprocess_timeout:
|
|
3367
|
-
# We need to distinguishing get_dataset_size or train finished normally and hang scenario.
|
|
3368
|
-
# If get_dataset_size or train finished normally, _stop_subprocess can be execute and
|
|
3369
|
-
# self.need_abort can be set to True. If main process is hang in get(), self.need_abort
|
|
3370
|
-
# will never set to True, then we wait for 30s and kill main process
|
|
3371
|
-
if eot.is_set():
|
|
3372
|
-
return
|
|
3373
|
-
# Sometimes subprocess may be zombie, so in 30s we can wait and do some useful tasks(waitpid).
|
|
3374
|
-
_PythonMultiprocessing.wait_pid()
|
|
3375
|
-
# multiprocessing.queue may hang in .get() forever when put() process was killed.
|
|
3376
|
-
# We have to exit main process otherwise main process will hang.
|
|
3377
|
-
_PythonMultiprocessing._terminate_processes(workers)
|
|
3378
|
-
logger.critical("The subprocess of dataset may exit unexpected or be killed, "
|
|
3379
|
-
"main process will exit. If this is not an artificial operation, you can use "
|
|
3380
|
-
"ds.config.set_enable_watchdog(False) to block this error.")
|
|
3381
|
-
os.kill(os.getpid(), signal.SIGTERM)
|
|
3382
|
-
# sleep to release GIL
|
|
3383
|
-
time.sleep(1)
|
|
3384
|
-
|
|
3385
|
-
# release the workers
|
|
3386
|
-
del workers
|
|
3387
|
-
|
|
3388
3379
|
@staticmethod
|
|
3389
3380
|
def _terminate_processes(processes):
|
|
3390
3381
|
"""Terminate subprocesses"""
|
|
@@ -3401,45 +3392,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3401
3392
|
# We don't use w.join because join can only used in main process or join will raise an error.
|
|
3402
3393
|
p._popen.wait() # pylint: disable=W0212
|
|
3403
3394
|
|
|
3404
|
-
# Monitor the exit number of subprocesses
|
|
3405
|
-
@staticmethod
|
|
3406
|
-
def _monitor_subprocess_exit(workers):
|
|
3407
|
-
"""
|
|
3408
|
-
To monitor whether process is exit.
|
|
3409
|
-
|
|
3410
|
-
Args:
|
|
3411
|
-
workers (list of multiprocessing.Process): multiprocessing.Process.
|
|
3412
|
-
|
|
3413
|
-
Returns:
|
|
3414
|
-
int, the timeout(in seconds) when process exit.
|
|
3415
|
-
"""
|
|
3416
|
-
for w in workers:
|
|
3417
|
-
try:
|
|
3418
|
-
exit_code = w.exitcode
|
|
3419
|
-
if exit_code is not None:
|
|
3420
|
-
# For kill -9, we can exit quickly
|
|
3421
|
-
if exit_code == -9:
|
|
3422
|
-
return 1
|
|
3423
|
-
# For kill -15, we still exit after 30s
|
|
3424
|
-
if exit_code == -15:
|
|
3425
|
-
return 30
|
|
3426
|
-
# In some cases the subprocess has been killed but the exitcode is still None.
|
|
3427
|
-
# So we use os.kill(pid, 0) to check if it is alive.
|
|
3428
|
-
subprocess_alive = _PythonMultiprocessing.is_process_alive(w.pid)
|
|
3429
|
-
if not subprocess_alive:
|
|
3430
|
-
# Like kill -15, we wait 30s before exit
|
|
3431
|
-
return 30
|
|
3432
|
-
except ValueError:
|
|
3433
|
-
# process has been closed already
|
|
3434
|
-
return 0
|
|
3435
|
-
return 0
|
|
3436
|
-
|
|
3437
3395
|
@staticmethod
|
|
3438
3396
|
def is_process_alive(pid):
|
|
3439
3397
|
"""
|
|
3440
3398
|
Check if the process is alive or not.
|
|
3441
3399
|
Note: We hit a deadlock when we use psutil or w.exitcode to check whether a process is alive.
|
|
3442
|
-
Instead we use os.kill(ppid, 0).
|
|
3400
|
+
Instead, we use os.kill(ppid, 0).
|
|
3443
3401
|
|
|
3444
3402
|
Args:
|
|
3445
3403
|
pid: pid of the process to be checked
|
|
@@ -3466,6 +3424,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3466
3424
|
quit_signal: The flag of quit.
|
|
3467
3425
|
"""
|
|
3468
3426
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
3427
|
+
# Initialize C++ side signal handlers
|
|
3428
|
+
cde.register_worker_handlers()
|
|
3469
3429
|
while _PythonMultiprocessing.is_process_alive(ppid):
|
|
3470
3430
|
if quit_signal.is_set():
|
|
3471
3431
|
return
|
|
@@ -3477,6 +3437,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3477
3437
|
|
|
3478
3438
|
time.sleep(0.1)
|
|
3479
3439
|
|
|
3440
|
+
logger.info("Clean process detects that the main process {} has exited, begin to terminate the "
|
|
3441
|
+
"worker process(es): {}".format(ppid, [worker.pid for worker in workers]))
|
|
3480
3442
|
_PythonMultiprocessing._terminate_processes(workers)
|
|
3481
3443
|
del workers
|
|
3482
3444
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
@@ -3493,10 +3455,10 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3493
3455
|
"""
|
|
3494
3456
|
self.python_threads_to_workers = {}
|
|
3495
3457
|
self.op_id = op_id
|
|
3496
|
-
logger.info("Launching new Python
|
|
3458
|
+
logger.info("Launching new Python multiprocessing pool for Op: " + str(self.op_id))
|
|
3497
3459
|
if self.is_mp_enabled():
|
|
3498
3460
|
message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
|
|
3499
|
-
|
|
3461
|
+
" The existing pool will be terminated first."
|
|
3500
3462
|
logger.warning(message)
|
|
3501
3463
|
self.terminate()
|
|
3502
3464
|
self.reset()
|
|
@@ -3515,32 +3477,52 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3515
3477
|
if self.workers is not None:
|
|
3516
3478
|
raise Exception("Pool was already created, close it first.")
|
|
3517
3479
|
|
|
3518
|
-
# Let gc collect unreferenced memory to avoid child processes in the pool to do it
|
|
3519
|
-
gc.collect()
|
|
3520
|
-
|
|
3521
|
-
# Construct python worker processes
|
|
3522
3480
|
self.workers = []
|
|
3481
|
+
self.pipes = []
|
|
3482
|
+
self.check_interval = get_multiprocessing_timeout_interval()
|
|
3523
3483
|
self.warning_ctl = multiprocessing.Value('i', 0)
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3484
|
+
if self.start_method == "fork":
|
|
3485
|
+
# Construct python worker processes
|
|
3486
|
+
for worker_id in range(self.num_parallel_workers):
|
|
3487
|
+
worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, worker_id)
|
|
3488
|
+
worker.start()
|
|
3489
|
+
self.workers.append(worker)
|
|
3490
|
+
else:
|
|
3491
|
+
multiprocessing.set_start_method(self.start_method, True)
|
|
3492
|
+
|
|
3493
|
+
# Construct python worker processes
|
|
3494
|
+
for worker_id in range(self.num_parallel_workers):
|
|
3495
|
+
shared_memory = get_enable_shared_mem()
|
|
3496
|
+
pipe = Pipe(self.warning_ctl, shared_memory=shared_memory, max_rowsize=self.max_rowsize)
|
|
3497
|
+
self.check_interval = get_multiprocessing_timeout_interval()
|
|
3498
|
+
worker = multiprocessing.Process(target=WorkerTarget(self.operations, pipe, worker_id),
|
|
3499
|
+
name="MapWorker" + str(worker_id), daemon=True)
|
|
3500
|
+
self.workers.append(worker)
|
|
3501
|
+
self.pipes.append(pipe)
|
|
3502
|
+
worker.start()
|
|
3503
|
+
|
|
3504
|
+
multiprocessing.set_start_method("fork", True)
|
|
3528
3505
|
|
|
3529
|
-
logger.info("
|
|
3506
|
+
logger.info("Launch worker process(es): {}".format(self.get_pids()))
|
|
3530
3507
|
|
|
3531
3508
|
self.hook = _PythonMultiprocessing._ExceptHookHandler()
|
|
3532
3509
|
|
|
3533
|
-
#
|
|
3534
|
-
self.
|
|
3510
|
+
# Launch a clean process and register worker processes to be monitored by the watch dog.
|
|
3511
|
+
self._launch_monitor()
|
|
3512
|
+
self.running = True
|
|
3535
3513
|
|
|
3536
|
-
|
|
3514
|
+
# Register a termination function using weakref to avoid the object from unable to properly destruct.
|
|
3515
|
+
atexit.register(lambda cleanup: cleanup()() if cleanup() is not None else None,
|
|
3516
|
+
weakref.WeakMethod(self.terminate))
|
|
3537
3517
|
|
|
3538
3518
|
def terminate(self):
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
|
|
3543
|
-
|
|
3519
|
+
if self.running:
|
|
3520
|
+
# abort the monitor first and then close all the workers
|
|
3521
|
+
self._abort_monitor()
|
|
3522
|
+
self.close_all_workers()
|
|
3523
|
+
if hasattr(self, "warning_ctl"):
|
|
3524
|
+
del self.warning_ctl
|
|
3525
|
+
self.running = False
|
|
3544
3526
|
|
|
3545
3527
|
def get_pids(self):
|
|
3546
3528
|
"""
|
|
@@ -3596,15 +3578,48 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3596
3578
|
|
|
3597
3579
|
# todo check_iterator_cleanup
|
|
3598
3580
|
if self.is_running() and check_iterator_cleanup() is False:
|
|
3599
|
-
|
|
3581
|
+
if self.start_method == "fork":
|
|
3582
|
+
return self.workers[worker_id].execute(idx, *args)
|
|
3583
|
+
# spawn mode
|
|
3584
|
+
self.pipes[worker_id].master_send(idx, args)
|
|
3585
|
+
time_s = time.time()
|
|
3586
|
+
wait_count = 1
|
|
3587
|
+
while True:
|
|
3588
|
+
cost_time = time.time() - time_s
|
|
3589
|
+
if cost_time / self.check_interval >= wait_count:
|
|
3590
|
+
wait_count += 1
|
|
3591
|
+
logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
|
|
3592
|
+
"worker of the map operation is hanging. "
|
|
3593
|
+
"Check whether the user defined data transform is too slow or the "
|
|
3594
|
+
"output data is too large. You can also set the timeout interval by "
|
|
3595
|
+
"ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
|
|
3596
|
+
"of this log.")
|
|
3597
|
+
pid = self.workers[worker_id].pid
|
|
3598
|
+
logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
|
|
3599
|
+
install_status, _ = subprocess.getstatusoutput("py-spy --version")
|
|
3600
|
+
if install_status == 0:
|
|
3601
|
+
stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
|
|
3602
|
+
logger.warning("Map worker subprocess stack:\n{}".format(stack))
|
|
3603
|
+
else:
|
|
3604
|
+
logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
|
|
3605
|
+
try:
|
|
3606
|
+
res = self.pipes[worker_id].master_receive()
|
|
3607
|
+
except queue.Empty:
|
|
3608
|
+
continue
|
|
3609
|
+
if res is None:
|
|
3610
|
+
# receive finish signal
|
|
3611
|
+
return None
|
|
3612
|
+
if isinstance(res, ExceptionHandler):
|
|
3613
|
+
res.reraise()
|
|
3614
|
+
return res
|
|
3600
3615
|
|
|
3601
3616
|
return None
|
|
3602
3617
|
|
|
3603
|
-
def
|
|
3618
|
+
def _launch_monitor(self):
|
|
3604
3619
|
"""
|
|
3605
|
-
|
|
3606
|
-
The
|
|
3607
|
-
The
|
|
3620
|
+
Launch a clean process and register subprocess to be monitored by the watch dog.
|
|
3621
|
+
The clean process will clean up subprocesses when main process exited.
|
|
3622
|
+
The watch dog will clean up subprocesses and main process when any subprocess exited.
|
|
3608
3623
|
"""
|
|
3609
3624
|
if platform.system().lower() != 'windows':
|
|
3610
3625
|
self.eof = multiprocessing.Event()
|
|
@@ -3613,38 +3628,45 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3613
3628
|
args=(self.ppid, self.workers, self.eof),
|
|
3614
3629
|
daemon=True)
|
|
3615
3630
|
self.cleaning_process.start()
|
|
3631
|
+
logger.info("Launch clean process {} to monitor worker "
|
|
3632
|
+
"process(es): {}".format(self.cleaning_process.pid, self.get_pids()))
|
|
3616
3633
|
|
|
3617
3634
|
if get_enable_watchdog():
|
|
3618
|
-
|
|
3619
|
-
|
|
3620
|
-
|
|
3621
|
-
|
|
3622
|
-
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3626
|
-
if
|
|
3627
|
-
self.
|
|
3628
|
-
|
|
3629
|
-
def abort_watchdog(self):
|
|
3630
|
-
if hasattr(self, 'watch_dog') and self.watch_dog is not None and hasattr(self, 'eot') and self.eot is not None:
|
|
3631
|
-
self._abort_watchdog()
|
|
3635
|
+
worker_ids = [worker.pid for worker in self.workers]
|
|
3636
|
+
worker_ids.append(self.cleaning_process.pid)
|
|
3637
|
+
cde.register_worker_pids(id(self), set(worker_ids))
|
|
3638
|
+
|
|
3639
|
+
def _abort_monitor(self):
|
|
3640
|
+
"""Deregister workers monitored by the watch dog and join clean process."""
|
|
3641
|
+
if get_enable_watchdog():
|
|
3642
|
+
cde.deregister_worker_pids(id(self))
|
|
3643
|
+
if hasattr(self, 'eof') and self.eof is not None:
|
|
3644
|
+
self.eof.set()
|
|
3632
3645
|
if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
|
|
3633
|
-
|
|
3634
|
-
|
|
3635
|
-
|
|
3646
|
+
# let the quit event notify the cleaning process to exit
|
|
3647
|
+
self.cleaning_process.join(timeout=5)
|
|
3648
|
+
if self.cleaning_process.is_alive():
|
|
3649
|
+
# if the cleaning process did not exit, it may hang, try to terminate it
|
|
3650
|
+
_PythonMultiprocessing._terminate_processes([self.cleaning_process])
|
|
3636
3651
|
del self.cleaning_process
|
|
3637
3652
|
|
|
3638
3653
|
def is_running(self):
|
|
3639
3654
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
3640
|
-
|
|
3655
|
+
if self.start_method == "fork":
|
|
3656
|
+
return all([w.is_alive() for w in self.workers])
|
|
3657
|
+
return all([worker_is_alive(w) for w in self.workers])
|
|
3641
3658
|
return False
|
|
3642
3659
|
|
|
3643
3660
|
def close_all_workers(self):
|
|
3644
3661
|
"""Close all the subprocess workers"""
|
|
3645
3662
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
3646
|
-
|
|
3647
|
-
w.
|
|
3663
|
+
if self.start_method == "fork":
|
|
3664
|
+
for w in self.workers:
|
|
3665
|
+
w.close()
|
|
3666
|
+
else:
|
|
3667
|
+
for i, w in enumerate(self.workers):
|
|
3668
|
+
close_worker(w, self.pipes[i])
|
|
3669
|
+
|
|
3648
3670
|
check_interval = get_multiprocessing_timeout_interval()
|
|
3649
3671
|
for w in self.workers:
|
|
3650
3672
|
try:
|
|
@@ -3660,8 +3682,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3660
3682
|
continue
|
|
3661
3683
|
raise e
|
|
3662
3684
|
try:
|
|
3663
|
-
if
|
|
3664
|
-
|
|
3685
|
+
if self.start_method == "fork":
|
|
3686
|
+
if w.is_alive():
|
|
3687
|
+
os.close(subprocess_file_descriptor)
|
|
3688
|
+
else:
|
|
3689
|
+
if worker_is_alive(w):
|
|
3690
|
+
os.close(subprocess_file_descriptor)
|
|
3665
3691
|
except OSError as e:
|
|
3666
3692
|
# Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
|
|
3667
3693
|
if "Bad file descriptor" not in str(e):
|
|
@@ -3670,6 +3696,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3670
3696
|
# use clear to release the handle which is better than self.workers = None
|
|
3671
3697
|
self.workers.clear()
|
|
3672
3698
|
self.workers = None
|
|
3699
|
+
self.pipes.clear()
|
|
3700
|
+
self.pipes = None
|
|
3673
3701
|
self.pids = None
|
|
3674
3702
|
|
|
3675
3703
|
|
|
@@ -3782,11 +3810,6 @@ class MapDataset(UnionBaseDataset):
|
|
|
3782
3810
|
def __deepcopy__(self, memodict):
|
|
3783
3811
|
return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
|
|
3784
3812
|
|
|
3785
|
-
def __del__(self):
|
|
3786
|
-
if hasattr(self, "process_pool") and self.process_pool is not None:
|
|
3787
|
-
self.process_pool.terminate()
|
|
3788
|
-
del self.process_pool
|
|
3789
|
-
|
|
3790
3813
|
@staticmethod
|
|
3791
3814
|
def __parse_op_name(op):
|
|
3792
3815
|
"""
|
|
@@ -3915,8 +3938,9 @@ class MapDataset(UnionBaseDataset):
|
|
|
3915
3938
|
callable_list.append(op)
|
|
3916
3939
|
|
|
3917
3940
|
if callable_list:
|
|
3918
|
-
self.process_pool = _PythonMultiprocessing(
|
|
3919
|
-
self.
|
|
3941
|
+
self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(),
|
|
3942
|
+
self.num_parallel_workers, str(self),
|
|
3943
|
+
callable_list, self.max_rowsize)
|
|
3920
3944
|
# Pass #2
|
|
3921
3945
|
idx = 0
|
|
3922
3946
|
for op in self.operations:
|
|
@@ -4142,6 +4166,7 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
4142
4166
|
if isinstance(c, ConcatDataset):
|
|
4143
4167
|
c.use_sampler(sampler)
|
|
4144
4168
|
set_child(c)
|
|
4169
|
+
|
|
4145
4170
|
set_child(self)
|
|
4146
4171
|
|
|
4147
4172
|
return
|
|
@@ -4242,7 +4267,7 @@ class _ToDevice:
|
|
|
4242
4267
|
if get_debug_mode():
|
|
4243
4268
|
logger.error("MindData debugger cannot be used in dataset sink mode. Please manually turn off "
|
|
4244
4269
|
"sink mode and try debugger again.")
|
|
4245
|
-
ir_tree,
|
|
4270
|
+
ir_tree, _ = dataset.create_ir_tree()
|
|
4246
4271
|
|
|
4247
4272
|
self._runtime_context = cde.PythonRuntimeContext()
|
|
4248
4273
|
self._runtime_context.Init()
|
|
@@ -4442,7 +4467,7 @@ class Schema:
|
|
|
4442
4467
|
Class to represent a schema of a dataset.
|
|
4443
4468
|
|
|
4444
4469
|
Args:
|
|
4445
|
-
schema_file (str): Path of the schema file. Default: ``None``.
|
|
4470
|
+
schema_file (str, optional): Path of the schema file. Default: ``None``.
|
|
4446
4471
|
|
|
4447
4472
|
Raises:
|
|
4448
4473
|
RuntimeError: If schema file failed to load.
|