mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +13 -6
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -0
- mindspore/_checkparam.py +3 -38
- mindspore/_deprecated/__init__.py +17 -0
- mindspore/_deprecated/jit.py +198 -0
- mindspore/_extends/builtin_operations.py +1 -1
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +6 -7
- mindspore/_extends/parse/compile_config.py +83 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
- mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
- mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
- mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
- mindspore/_extends/parse/parser.py +46 -197
- mindspore/_extends/parse/resources.py +1 -5
- mindspore/_extends/parse/standard_method.py +217 -98
- mindspore/_extends/pijit/__init__.py +2 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
- mindspore/_extends/pijit/tensor_func_list.py +27 -0
- mindspore/_extends/utils.py +1 -1
- mindspore/amp.py +11 -5
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/__init__.py +2 -2
- mindspore/boost/base.py +3 -7
- mindspore/boost/boost_cell_wrapper.py +138 -43
- mindspore/common/__init__.py +6 -3
- mindspore/common/_grad_function.py +56 -0
- mindspore/common/_pijit_context.py +14 -5
- mindspore/common/_register_for_tensor.py +1 -2
- mindspore/common/_stub_tensor.py +30 -14
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +4760 -0
- mindspore/common/api.py +435 -371
- mindspore/common/auto_dynamic_shape.py +41 -44
- mindspore/common/dtype.py +39 -36
- mindspore/common/dump.py +9 -6
- mindspore/common/file_system.py +9 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +6 -2
- mindspore/common/initializer.py +13 -10
- mindspore/common/jit_begin_end.py +94 -0
- mindspore/common/jit_config.py +6 -1
- mindspore/common/jit_context.py +76 -0
- mindspore/common/jit_trace.py +378 -0
- mindspore/common/lazy_inline.py +9 -3
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/mutable.py +5 -4
- mindspore/common/parameter.py +135 -52
- mindspore/common/seed.py +2 -2
- mindspore/common/sparse_tensor.py +23 -17
- mindspore/common/tensor.py +951 -1992
- mindspore/communication/__init__.py +7 -5
- mindspore/communication/_comm_helper.py +52 -2
- mindspore/communication/comm_func.py +240 -181
- mindspore/communication/management.py +95 -26
- mindspore/context.py +314 -566
- mindspore/dataset/__init__.py +65 -37
- mindspore/dataset/audio/__init__.py +2 -8
- mindspore/dataset/audio/transforms.py +3 -17
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +87 -6
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +6 -5
- mindspore/dataset/engine/datasets.py +292 -267
- mindspore/dataset/engine/datasets_audio.py +22 -8
- mindspore/dataset/engine/datasets_standard_format.py +46 -27
- mindspore/dataset/engine/datasets_text.py +78 -48
- mindspore/dataset/engine/datasets_user_defined.py +182 -116
- mindspore/dataset/engine/datasets_vision.py +120 -44
- mindspore/dataset/engine/iterators.py +283 -63
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +289 -43
- mindspore/dataset/engine/serializer_deserializer.py +3 -2
- mindspore/dataset/engine/validators.py +53 -11
- mindspore/dataset/text/__init__.py +7 -6
- mindspore/dataset/text/transforms.py +6 -5
- mindspore/dataset/text/utils.py +3 -3
- mindspore/dataset/transforms/__init__.py +0 -9
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +31 -14
- mindspore/dataset/utils/browse_dataset.py +1 -1
- mindspore/dataset/vision/__init__.py +2 -9
- mindspore/dataset/vision/transforms.py +202 -158
- mindspore/dataset/vision/utils.py +7 -5
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +153 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +123 -0
- mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +170 -0
- mindspore/experimental/es/embedding_service.py +35 -27
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/map_parameter.py +4 -4
- mindspore/experimental/optim/adadelta.py +6 -6
- mindspore/experimental/optim/adagrad.py +4 -4
- mindspore/experimental/optim/adam.py +7 -0
- mindspore/experimental/optim/adamax.py +4 -4
- mindspore/experimental/optim/adamw.py +4 -0
- mindspore/experimental/optim/asgd.py +1 -1
- mindspore/experimental/optim/lr_scheduler.py +73 -46
- mindspore/experimental/optim/radam.py +34 -31
- mindspore/experimental/optim/rprop.py +1 -1
- mindspore/experimental/optim/sgd.py +1 -1
- mindspore/hal/contiguous_tensors_handle.py +6 -10
- mindspore/hal/device.py +55 -53
- mindspore/hal/event.py +52 -52
- mindspore/hal/memory.py +157 -117
- mindspore/hal/stream.py +150 -109
- mindspore/include/api/context.h +0 -1
- mindspore/include/dataset/constants.h +7 -4
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +50 -0
- mindspore/mindrecord/__init__.py +21 -8
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +796 -759
- mindspore/mint/distributed/__init__.py +70 -4
- mindspore/mint/distributed/distributed.py +2679 -44
- mindspore/mint/linalg/__init__.py +8 -0
- mindspore/mint/nn/__init__.py +743 -22
- mindspore/mint/nn/functional.py +716 -23
- mindspore/mint/nn/layer/__init__.py +21 -4
- mindspore/mint/nn/layer/_functions.py +334 -0
- mindspore/mint/nn/layer/activation.py +276 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +921 -0
- mindspore/mint/nn/layer/normalization.py +223 -28
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +235 -0
- mindspore/mint/optim/__init__.py +3 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/optim/sgd.py +171 -0
- mindspore/mint/special/__init__.py +2 -1
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +4 -1
- mindspore/nn/cell.py +1370 -189
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +29 -27
- mindspore/nn/layer/basic.py +51 -35
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/container.py +1 -1
- mindspore/nn/layer/conv.py +22 -17
- mindspore/nn/layer/embedding.py +12 -11
- mindspore/nn/layer/normalization.py +56 -49
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +120 -42
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +59 -36
- mindspore/nn/learning_rate_schedule.py +8 -4
- mindspore/nn/loss/loss.py +58 -55
- mindspore/nn/optim/ada_grad.py +7 -5
- mindspore/nn/optim/adadelta.py +11 -9
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +17 -13
- mindspore/nn/optim/adamax.py +8 -7
- mindspore/nn/optim/adasum.py +5 -5
- mindspore/nn/optim/asgd.py +1 -1
- mindspore/nn/optim/ftrl.py +11 -9
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/lazyadam.py +12 -10
- mindspore/nn/optim/momentum.py +7 -6
- mindspore/nn/optim/optimizer.py +3 -3
- mindspore/nn/optim/proximal_ada_grad.py +12 -10
- mindspore/nn/optim/rmsprop.py +13 -12
- mindspore/nn/optim/rprop.py +11 -9
- mindspore/nn/optim/sgd.py +9 -6
- mindspore/nn/optim/tft_wrapper.py +5 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/probability/bijector/bijector.py +17 -11
- mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
- mindspore/nn/probability/bijector/invert.py +2 -2
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +3 -2
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +1 -1
- mindspore/nn/probability/distribution/cauchy.py +4 -2
- mindspore/nn/probability/distribution/exponential.py +6 -7
- mindspore/nn/probability/distribution/gamma.py +2 -2
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/half_normal.py +5 -3
- mindspore/nn/probability/distribution/logistic.py +5 -3
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/uniform.py +5 -3
- mindspore/nn/reinforcement/_tensors_queue.py +1 -1
- mindspore/nn/reinforcement/tensor_array.py +1 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/__init__.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +181 -122
- mindspore/nn/wrap/grad_reducer.py +45 -36
- mindspore/nn/wrap/loss_scale.py +6 -7
- mindspore/numpy/array_creations.py +63 -65
- mindspore/numpy/array_ops.py +149 -144
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +365 -363
- mindspore/numpy/utils.py +17 -18
- mindspore/numpy/utils_const.py +5 -6
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +5 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_register_for_op.py +0 -11
- mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
- mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
- mindspore/ops/_vmap/vmap_array_ops.py +27 -25
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
- mindspore/ops/_vmap/vmap_math_ops.py +15 -16
- mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
- mindspore/ops/auto_generate/__init__.py +4 -3
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
- mindspore/ops/auto_generate/gen_extend_func.py +764 -124
- mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
- mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
- mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
- mindspore/ops/composite/__init__.py +2 -1
- mindspore/ops/composite/base.py +20 -25
- mindspore/ops/composite/math_ops.py +6 -16
- mindspore/ops/composite/multitype_ops/__init__.py +5 -2
- mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
- mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
- mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
- mindspore/ops/function/__init__.py +40 -2
- mindspore/ops/function/_add_attr_func.py +58 -0
- mindspore/ops/function/array_func.py +2089 -2403
- mindspore/ops/function/clip_func.py +80 -23
- mindspore/ops/function/debug_func.py +57 -57
- mindspore/ops/function/grad/__init__.py +1 -0
- mindspore/ops/function/grad/grad_func.py +104 -71
- mindspore/ops/function/image_func.py +2 -2
- mindspore/ops/function/linalg_func.py +47 -78
- mindspore/ops/function/math_func.py +4501 -3802
- mindspore/ops/function/nn_func.py +1726 -620
- mindspore/ops/function/other_func.py +159 -1
- mindspore/ops/function/parameter_func.py +18 -84
- mindspore/ops/function/random_func.py +440 -387
- mindspore/ops/function/reshard_func.py +4 -70
- mindspore/ops/function/sparse_func.py +3 -3
- mindspore/ops/function/sparse_unary_func.py +6 -6
- mindspore/ops/function/spectral_func.py +25 -58
- mindspore/ops/function/vmap_func.py +24 -17
- mindspore/ops/functional.py +22 -7
- mindspore/ops/functional_overload.py +1440 -0
- mindspore/ops/op_info_register.py +32 -244
- mindspore/ops/operations/__init__.py +13 -7
- mindspore/ops/operations/_custom_ops_utils.py +247 -0
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -43
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +43 -84
- mindspore/ops/operations/_ms_kernel.py +4 -10
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/_scalar_ops.py +3 -2
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/array_ops.py +81 -324
- mindspore/ops/operations/comm_ops.py +154 -108
- mindspore/ops/operations/custom_ops.py +232 -78
- mindspore/ops/operations/debug_ops.py +153 -59
- mindspore/ops/operations/inner_ops.py +7 -5
- mindspore/ops/operations/linalg_ops.py +1 -57
- mindspore/ops/operations/manually_defined/_inner.py +1 -1
- mindspore/ops/operations/manually_defined/ops_def.py +928 -180
- mindspore/ops/operations/math_ops.py +32 -234
- mindspore/ops/operations/nn_ops.py +210 -498
- mindspore/ops/operations/other_ops.py +62 -9
- mindspore/ops/operations/random_ops.py +13 -7
- mindspore/ops/operations/reshard_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +2 -2
- mindspore/ops/primitive.py +66 -53
- mindspore/ops/tensor_method.py +1888 -0
- mindspore/ops_generate/__init__.py +0 -5
- mindspore/ops_generate/aclnn/__init__.py +0 -0
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
- mindspore/ops_generate/api/__init__.py +0 -0
- mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
- mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
- mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
- mindspore/ops_generate/api/functions_cc_generator.py +237 -0
- mindspore/ops_generate/api/gen_api.py +103 -0
- mindspore/ops_generate/api/op_api_proto.py +235 -0
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
- mindspore/ops_generate/common/__init__.py +0 -0
- mindspore/ops_generate/common/base_generator.py +11 -0
- mindspore/ops_generate/common/gen_constants.py +91 -0
- mindspore/ops_generate/common/gen_utils.py +348 -0
- mindspore/ops_generate/common/op_proto.py +473 -0
- mindspore/ops_generate/common/template.py +523 -0
- mindspore/ops_generate/gen_ops.py +22 -1069
- mindspore/ops_generate/op_def/__init__.py +0 -0
- mindspore/ops_generate/op_def/gen_op_def.py +90 -0
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
- mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
- mindspore/ops_generate/op_def_py/__init__.py +0 -0
- mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
- mindspore/ops_generate/pyboost/__init__.py +0 -0
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
- mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
- mindspore/ops_generate/resources/__init__.py +0 -0
- mindspore/ops_generate/resources/resource_list.py +30 -0
- mindspore/ops_generate/resources/resource_loader.py +36 -0
- mindspore/ops_generate/resources/resource_manager.py +64 -0
- mindspore/ops_generate/resources/yaml_loader.py +88 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
- mindspore/parallel/__init__.py +7 -3
- mindspore/parallel/_auto_parallel_context.py +152 -34
- mindspore/parallel/_cell_wrapper.py +130 -15
- mindspore/parallel/_parallel_serialization.py +107 -5
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +7 -2
- mindspore/parallel/_tensor.py +142 -18
- mindspore/parallel/_utils.py +199 -23
- mindspore/parallel/algo_parameter_config.py +4 -4
- mindspore/parallel/auto_parallel.py +732 -0
- mindspore/parallel/checkpoint_convert.py +159 -0
- mindspore/parallel/checkpoint_transform.py +698 -35
- mindspore/parallel/cluster/process_entity/_api.py +276 -50
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +21 -4
- mindspore/parallel/function/__init__.py +24 -0
- mindspore/parallel/function/reshard_func.py +259 -0
- mindspore/parallel/nn/__init__.py +25 -0
- mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
- mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
- mindspore/parallel/parameter_broadcast.py +25 -14
- mindspore/parallel/shard.py +137 -58
- mindspore/parallel/transform_safetensors.py +363 -305
- mindspore/profiler/__init__.py +22 -5
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +186 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +221 -0
- mindspore/profiler/common/path_manager.py +395 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +500 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_meta_data.py +74 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +251 -0
- mindspore/profiler/common/profiler_path_manager.py +179 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +341 -75
- mindspore/profiler/envprofiler.py +163 -0
- mindspore/profiler/experimental_config.py +197 -0
- mindspore/profiler/mstx.py +242 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +335 -0
- mindspore/profiler/profiler.py +1073 -90
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +118 -0
- mindspore/profiler/schedule.py +243 -0
- mindspore/rewrite/api/node.py +15 -13
- mindspore/rewrite/api/symbol_tree.py +2 -3
- mindspore/run_check/_check_version.py +27 -20
- mindspore/run_check/run_check.py +1 -1
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +177 -0
- mindspore/runtime/memory.py +409 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/safeguard/rewrite_obfuscation.py +12 -9
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +8 -8
- mindspore/train/_utils.py +88 -25
- mindspore/train/amp.py +9 -5
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +2 -16
- mindspore/train/callback/_checkpoint.py +53 -55
- mindspore/train/callback/_cluster_monitor.py +14 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +103 -68
- mindspore/train/callback/_history.py +8 -5
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +0 -3
- mindspore/train/callback/_loss_monitor.py +2 -1
- mindspore/train/callback/_on_request_exit.py +6 -5
- mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
- mindspore/train/callback/_summary_collector.py +52 -19
- mindspore/train/callback/_time_monitor.py +2 -1
- mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
- mindspore/train/data_sink.py +25 -2
- mindspore/train/dataset_helper.py +15 -16
- mindspore/train/loss_scale_manager.py +8 -7
- mindspore/train/metrics/accuracy.py +3 -3
- mindspore/train/metrics/confusion_matrix.py +9 -9
- mindspore/train/metrics/error.py +3 -3
- mindspore/train/metrics/hausdorff_distance.py +4 -4
- mindspore/train/metrics/mean_surface_distance.py +3 -3
- mindspore/train/metrics/metric.py +0 -12
- mindspore/train/metrics/occlusion_sensitivity.py +4 -2
- mindspore/train/metrics/precision.py +11 -10
- mindspore/train/metrics/recall.py +9 -9
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +174 -46
- mindspore/train/model.py +184 -113
- mindspore/train/serialization.py +622 -978
- mindspore/train/summary/_summary_adapter.py +2 -2
- mindspore/train/summary/summary_record.py +2 -3
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dryrun.py +140 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/utils/utils.py +138 -4
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +562 -393
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
- mindspore/_install_custom.py +0 -43
- mindspore/common/_register_for_adapter.py +0 -74
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
- mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
- mindspore/ops_generate/gen_aclnn_implement.py +0 -263
- mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
- mindspore/ops_generate/gen_pyboost_func.py +0 -1052
- mindspore/ops_generate/gen_utils.py +0 -209
- mindspore/ops_generate/op_proto.py +0 -145
- mindspore/ops_generate/template.py +0 -261
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -16,23 +16,32 @@
|
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
18
|
import sys
|
|
19
|
+
import signal
|
|
19
20
|
import subprocess
|
|
21
|
+
import socket
|
|
22
|
+
import psutil
|
|
20
23
|
import mindspore.log as logger
|
|
21
|
-
from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url
|
|
22
|
-
|
|
24
|
+
from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
|
|
25
|
+
_is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip
|
|
26
|
+
|
|
23
27
|
|
|
24
28
|
class _Node:
|
|
25
29
|
"""
|
|
26
30
|
Base class for dynamic networking nodes.
|
|
27
31
|
|
|
28
32
|
"""
|
|
29
|
-
|
|
33
|
+
|
|
34
|
+
def __init__(self, worker_num, sched_host, sched_port, timeout, args_list, output_file, tail_worker_log,
|
|
35
|
+
join, is_simulation):
|
|
30
36
|
self.worker_num = worker_num
|
|
31
37
|
self.sched_host = sched_host
|
|
32
38
|
self.sched_port = sched_port
|
|
33
39
|
self.args_list = args_list
|
|
34
40
|
self.output_file = output_file
|
|
35
41
|
self.timeout = timeout
|
|
42
|
+
self.tail_worker_log = tail_worker_log
|
|
43
|
+
self.join = join
|
|
44
|
+
self.is_simulation = is_simulation
|
|
36
45
|
|
|
37
46
|
def run(self):
|
|
38
47
|
"""
|
|
@@ -40,15 +49,20 @@ class _Node:
|
|
|
40
49
|
|
|
41
50
|
"""
|
|
42
51
|
os.environ["MS_WORKER_NUM"] = str(self.worker_num)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
# If simulation level is set, environment variables for dynamic networking will not be set,
|
|
53
|
+
# and scheduler will not be started.
|
|
54
|
+
if not self.is_simulation:
|
|
55
|
+
os.environ["MS_SCHED_HOST"] = self.sched_host
|
|
56
|
+
os.environ["MS_SCHED_PORT"] = str(self.sched_port)
|
|
57
|
+
os.environ["MS_TOPO_TIMEOUT"] = str(self.timeout)
|
|
58
|
+
|
|
46
59
|
|
|
47
60
|
class _MetaServerNode(_Node):
|
|
48
61
|
"""
|
|
49
62
|
Scheduler node for dynamic networking. Inherits from the Node class.
|
|
50
63
|
|
|
51
64
|
"""
|
|
65
|
+
|
|
52
66
|
def run(self):
|
|
53
67
|
"""
|
|
54
68
|
Runs the MetaServerNode by setting environment variables, setting the MS_ROLE variable to
|
|
@@ -59,14 +73,17 @@ class _MetaServerNode(_Node):
|
|
|
59
73
|
with open(self.output_file, "w") as file_handle:
|
|
60
74
|
return subprocess.Popen(self.args_list, stdout=file_handle, stderr=subprocess.STDOUT)
|
|
61
75
|
|
|
76
|
+
|
|
62
77
|
class _ComputeGraphNode(_Node):
|
|
63
78
|
"""
|
|
64
79
|
Worker node for dynamic networking. Inherits from the Node class.
|
|
65
80
|
"""
|
|
66
|
-
def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file):
|
|
67
|
-
super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file)
|
|
68
|
-
self.node_id = node_id
|
|
69
81
|
|
|
82
|
+
def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file,
|
|
83
|
+
tail_worker_log, join, is_simulation):
|
|
84
|
+
super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file,
|
|
85
|
+
tail_worker_log, join, is_simulation)
|
|
86
|
+
self.node_id = node_id
|
|
70
87
|
|
|
71
88
|
def run(self):
|
|
72
89
|
"""
|
|
@@ -78,9 +95,36 @@ class _ComputeGraphNode(_Node):
|
|
|
78
95
|
super().run()
|
|
79
96
|
if self.node_id is not None:
|
|
80
97
|
os.environ["MS_NODE_ID"] = str(self.node_id)
|
|
81
|
-
|
|
98
|
+
# If simulation level is set, environment variable 'MS_ROLE' will not be set.
|
|
99
|
+
if not self.is_simulation:
|
|
100
|
+
os.environ["MS_ROLE"] = "MS_WORKER"
|
|
101
|
+
tail_worker_process = None
|
|
102
|
+
is_tail_worker_log = self.enable_tail_worker_log()
|
|
103
|
+
if self.join and not is_tail_worker_log:
|
|
104
|
+
logger.warning(f"The '--tail_worker_log' is:{self.tail_worker_log}, "
|
|
105
|
+
f"which doesn't contain this worker {self.node_id}."
|
|
106
|
+
f" So this worker {self.node_id}'s log will not be output to console. Reset "
|
|
107
|
+
"'--tail_worker_log', if you want to output this worker's log to console.")
|
|
82
108
|
with open(self.output_file, "w") as file_handle:
|
|
83
|
-
|
|
109
|
+
worker_process = subprocess.Popen(self.args_list, preexec_fn=os.setsid, stdout=file_handle,
|
|
110
|
+
stderr=subprocess.STDOUT)
|
|
111
|
+
if self.join and is_tail_worker_log:
|
|
112
|
+
tail_worker_process = self.output_to_console()
|
|
113
|
+
return worker_process, tail_worker_process
|
|
114
|
+
|
|
115
|
+
def output_to_console(self):
|
|
116
|
+
"""
|
|
117
|
+
Output worker log file to console.
|
|
118
|
+
"""
|
|
119
|
+
return subprocess.Popen(['/usr/bin/tail', '-f', self.output_file])
|
|
120
|
+
|
|
121
|
+
def enable_tail_worker_log(self):
|
|
122
|
+
tail_worker_log_list = []
|
|
123
|
+
if self.tail_worker_log != "-1":
|
|
124
|
+
tail_worker_log_list.extend([int(num) for num in self.tail_worker_log.split(',')])
|
|
125
|
+
if self.tail_worker_log != "-1" and self.node_id not in tail_worker_log_list:
|
|
126
|
+
return False
|
|
127
|
+
return True
|
|
84
128
|
|
|
85
129
|
|
|
86
130
|
class _ProcessManager:
|
|
@@ -89,6 +133,7 @@ class _ProcessManager:
|
|
|
89
133
|
training
|
|
90
134
|
|
|
91
135
|
"""
|
|
136
|
+
|
|
92
137
|
def __init__(self, args):
|
|
93
138
|
"""
|
|
94
139
|
Initializes a ProcessManager object.
|
|
@@ -99,13 +144,14 @@ class _ProcessManager:
|
|
|
99
144
|
"""
|
|
100
145
|
self.msn_process = None
|
|
101
146
|
self.cgn_processes = []
|
|
147
|
+
self.tail_cgn_processes = []
|
|
102
148
|
|
|
103
|
-
|
|
104
|
-
self.is_master = _is_local_ip(args.master_addr)
|
|
105
|
-
|
|
106
|
-
self.master_addr = args.master_addr
|
|
149
|
+
self.master_addr = _convert_addr_to_ip(args.master_addr)
|
|
107
150
|
self.master_port = args.master_port
|
|
108
151
|
|
|
152
|
+
"""`is_master` flags whether the current node is the master node."""
|
|
153
|
+
self.is_master = _is_local_ip(self.master_addr)
|
|
154
|
+
|
|
109
155
|
self.worker_num = args.worker_num
|
|
110
156
|
if self.worker_num <= 0:
|
|
111
157
|
raise ValueError(f"worker_num must be greater than 0, but got {self.worker_num}.")
|
|
@@ -115,6 +161,8 @@ class _ProcessManager:
|
|
|
115
161
|
|
|
116
162
|
self.log_dir = args.log_dir
|
|
117
163
|
self.join = args.join
|
|
164
|
+
self.worker_log_name = args.worker_log_name
|
|
165
|
+
self.tail_worker_log = args.tail_worker_log
|
|
118
166
|
self.cluster_time_out = args.cluster_time_out
|
|
119
167
|
self.bind_core = args.bind_core
|
|
120
168
|
self.rank_table_file = args.rank_table_file
|
|
@@ -123,19 +171,21 @@ class _ProcessManager:
|
|
|
123
171
|
self.sim_rank_id = args.sim_rank_id
|
|
124
172
|
self.is_simulation = (self.sim_level != -1)
|
|
125
173
|
if self.is_simulation:
|
|
126
|
-
# If simulation level is set, reset the worker_num and local_worker_num to 1
|
|
127
|
-
# so that host cluster could be initialized.
|
|
128
|
-
self.worker_num = 1
|
|
129
|
-
self.local_worker_num = 1
|
|
130
174
|
os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
|
|
131
175
|
elif os.getenv("MS_SIMULATION_LEVEL"):
|
|
132
|
-
# If simulation level env is set, load RANK_ID and RANK_SIZE envs.
|
|
133
|
-
self.worker_num = 1
|
|
134
|
-
self.local_worker_num = 1
|
|
135
176
|
self.is_simulation = True
|
|
136
|
-
self.sim_rank_id = os.getenv("RANK_ID", "
|
|
177
|
+
self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
|
|
137
178
|
if os.getenv("RANK_SIZE"):
|
|
138
179
|
self.exported_rank_size = os.getenv("RANK_SIZE")
|
|
180
|
+
# If sim_rank_id is set, single worker can be started.
|
|
181
|
+
if self.is_simulation and (self.sim_rank_id != -1):
|
|
182
|
+
logger.info(f"Simulation rank id is set to {self.sim_rank_id}, will dryrun a single process.")
|
|
183
|
+
self.local_worker_num = 1
|
|
184
|
+
if self.is_simulation and self.local_worker_num > 128:
|
|
185
|
+
self.local_worker_num = 1
|
|
186
|
+
self.sim_rank_id = 0
|
|
187
|
+
logger.warning(f"In dryrun case, local worker num is set to larger than 128. "
|
|
188
|
+
"To avoid a system clash, local worker num is set to 1.")
|
|
139
189
|
|
|
140
190
|
self.cmd = args.task_script
|
|
141
191
|
self.cmd_args = args.task_script_args
|
|
@@ -155,6 +205,21 @@ class _ProcessManager:
|
|
|
155
205
|
finally:
|
|
156
206
|
os.umask(origin_mask)
|
|
157
207
|
|
|
208
|
+
self.proc_rank_map = {}
|
|
209
|
+
self.enable_mindx = False
|
|
210
|
+
tft_env = os.getenv("MS_ENABLE_TFT", "")
|
|
211
|
+
if ("TTP:1" in tft_env) or ("UCE:1" in tft_env) or ("ARF:1" in tft_env):
|
|
212
|
+
try:
|
|
213
|
+
from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
|
|
214
|
+
self.msmgr = MSRunPlugin()
|
|
215
|
+
self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
|
|
216
|
+
self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
|
|
217
|
+
self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
|
|
218
|
+
self.enable_mindx = True
|
|
219
|
+
os.environ["MS_ENABLE_RECOVERY"] = str(1)
|
|
220
|
+
except Exception as e: # pylint: disable=broad-except
|
|
221
|
+
logger.warning(f"mindx is not installed, using original mindspore recovery strategy.: {str(e)}")
|
|
222
|
+
|
|
158
223
|
def run(self):
|
|
159
224
|
"""
|
|
160
225
|
Runs the process manager.
|
|
@@ -173,13 +238,15 @@ class _ProcessManager:
|
|
|
173
238
|
else:
|
|
174
239
|
sys.exit()
|
|
175
240
|
else:
|
|
176
|
-
if self.is_master:
|
|
241
|
+
if self.is_master and not self.is_simulation:
|
|
177
242
|
self.start_scheduler()
|
|
178
|
-
self.
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
self.
|
|
243
|
+
if self.enable_mindx:
|
|
244
|
+
self.msmgr.start()
|
|
245
|
+
else:
|
|
246
|
+
self.start_workers()
|
|
247
|
+
if self.join:
|
|
248
|
+
logger.warning("Distributed job is spawned. Waiting all processes to exit...")
|
|
249
|
+
self.join_processes()
|
|
183
250
|
|
|
184
251
|
def start_scheduler(self):
|
|
185
252
|
"""
|
|
@@ -190,7 +257,8 @@ class _ProcessManager:
|
|
|
190
257
|
os.environ['RANK_ID'] = str(0)
|
|
191
258
|
msn = _MetaServerNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
|
|
192
259
|
_generate_cmd_args_list(self.cmd, self.cmd_args),
|
|
193
|
-
os.path.join(self.log_dir, "scheduler.log")
|
|
260
|
+
os.path.join(self.log_dir, "scheduler.log"), self.tail_worker_log, self.join,
|
|
261
|
+
self.is_simulation)
|
|
194
262
|
self.msn_process = msn.run()
|
|
195
263
|
|
|
196
264
|
def start_workers(self):
|
|
@@ -208,9 +276,6 @@ class _ProcessManager:
|
|
|
208
276
|
"You can access 'RANK_ID' environment variable after calling "
|
|
209
277
|
"'mindspore.communication.init()'")
|
|
210
278
|
|
|
211
|
-
if self.is_simulation and self.worker_num != 1:
|
|
212
|
-
raise ValueError(f"Simulation level is set, worker_num must be 1, but got {self.worker_num}.")
|
|
213
|
-
|
|
214
279
|
for i in range(self.local_worker_num):
|
|
215
280
|
os.environ["DEVICE_ID"] = str(i)
|
|
216
281
|
node_id, log_name = self._get_node_id_and_log_path(i)
|
|
@@ -221,16 +286,17 @@ class _ProcessManager:
|
|
|
221
286
|
# If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
|
|
222
287
|
# This is for rank_table method's compatibility consideration.
|
|
223
288
|
os.environ["RANK_ID"] = str(node_id)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
if self.is_simulation:
|
|
227
|
-
# Reset RANK_ID env to sim_rank_id.
|
|
289
|
+
print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
|
|
290
|
+
f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
|
|
291
|
+
if self.is_simulation and (self.sim_rank_id != -1):
|
|
292
|
+
# Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
|
|
228
293
|
os.environ["RANK_ID"] = str(self.sim_rank_id)
|
|
294
|
+
logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
|
|
229
295
|
|
|
230
|
-
cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
|
|
231
|
-
if not cpu_num.isdigit():
|
|
232
|
-
raise RuntimeError("Fail to get cpu number from /proc/cpuinfo.")
|
|
233
296
|
if self.bind_core:
|
|
297
|
+
cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
|
|
298
|
+
if not cpu_num.isdigit():
|
|
299
|
+
raise RuntimeError(f"Got cpu number from '/proc/cpuinfo' is {cpu_num}, failed to bind core.")
|
|
234
300
|
avg = int(cpu_num) // self.local_worker_num
|
|
235
301
|
cpu_start = avg * i
|
|
236
302
|
cpu_end = cpu_start + avg - 1
|
|
@@ -238,9 +304,11 @@ class _ProcessManager:
|
|
|
238
304
|
else:
|
|
239
305
|
cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
|
|
240
306
|
cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
|
|
241
|
-
node_id, cmd, log_name)
|
|
242
|
-
process = cgn.run()
|
|
307
|
+
node_id, cmd, log_name, self.tail_worker_log, self.join, self.is_simulation)
|
|
308
|
+
process, tail_process = cgn.run()
|
|
243
309
|
self.cgn_processes.append(process)
|
|
310
|
+
self.tail_cgn_processes.append(tail_process)
|
|
311
|
+
self.proc_rank_map[i] = process
|
|
244
312
|
|
|
245
313
|
def join_processes(self):
|
|
246
314
|
"""
|
|
@@ -248,8 +316,15 @@ class _ProcessManager:
|
|
|
248
316
|
If there's any process does not exit normally, logs will be analyzed
|
|
249
317
|
so that understandable root cause of exception could be returned.
|
|
250
318
|
"""
|
|
319
|
+
|
|
320
|
+
def signal_handler(sig, frame):
|
|
321
|
+
logger.warning("msrun process received SIGNIN (Ctrl+C), terminating all workers.")
|
|
322
|
+
self.kill_all_processes()
|
|
323
|
+
sys.exit(0)
|
|
324
|
+
|
|
251
325
|
has_exception = False
|
|
252
326
|
success_cgn_processes = set()
|
|
327
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
253
328
|
while True:
|
|
254
329
|
# Traversal all workers and kill immediately if any exception happens.
|
|
255
330
|
for p in self.cgn_processes:
|
|
@@ -266,15 +341,14 @@ class _ProcessManager:
|
|
|
266
341
|
|
|
267
342
|
if has_exception:
|
|
268
343
|
logger.warning("There's worker exits with exception, kill all other workers.")
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
p.kill()
|
|
344
|
+
self.kill_worker_processes()
|
|
345
|
+
self.kill_tail_log_processes()
|
|
272
346
|
break
|
|
273
347
|
elif len(success_cgn_processes) == len(self.cgn_processes):
|
|
274
348
|
logger.info("All workers successfully exit!")
|
|
349
|
+
self.kill_tail_log_processes()
|
|
275
350
|
break
|
|
276
351
|
|
|
277
|
-
|
|
278
352
|
if self.msn_process:
|
|
279
353
|
self.msn_process.wait()
|
|
280
354
|
if self.msn_process.returncode != 0:
|
|
@@ -282,11 +356,40 @@ class _ProcessManager:
|
|
|
282
356
|
logger.error(f"Scheduler process {self.msn_process.pid} exit with exception.")
|
|
283
357
|
|
|
284
358
|
if has_exception:
|
|
285
|
-
logger.
|
|
359
|
+
logger.info("Analyzing exception log...")
|
|
286
360
|
self._analyze_log()
|
|
287
361
|
raise RuntimeError("Distributed job exited with exception. Please check logs in "
|
|
288
362
|
f"directory: {self.log_dir}.")
|
|
289
363
|
|
|
364
|
+
def kill_tail_log_processes(self):
|
|
365
|
+
"""
|
|
366
|
+
Kills all tail worker log processes.
|
|
367
|
+
|
|
368
|
+
"""
|
|
369
|
+
for p_tail in self.tail_cgn_processes:
|
|
370
|
+
if p_tail is not None:
|
|
371
|
+
logger.debug("Tail worker log process:{p_tail.pid} has been killed!")
|
|
372
|
+
p_tail.kill()
|
|
373
|
+
|
|
374
|
+
def kill_worker_processes(self):
|
|
375
|
+
"""
|
|
376
|
+
Kills all worker processes.
|
|
377
|
+
|
|
378
|
+
"""
|
|
379
|
+
for p in self.cgn_processes:
|
|
380
|
+
if p.poll() is None:
|
|
381
|
+
os.killpg(os.getpgid(p.pid), signal.SIGKILL)
|
|
382
|
+
|
|
383
|
+
def kill_all_processes(self):
|
|
384
|
+
"""
|
|
385
|
+
Kills all running processes, including scheduler, worker and tail log.
|
|
386
|
+
|
|
387
|
+
"""
|
|
388
|
+
self.kill_worker_processes()
|
|
389
|
+
self.kill_tail_log_processes()
|
|
390
|
+
if self.msn_process.poll() is None:
|
|
391
|
+
self.msn_process.kill()
|
|
392
|
+
|
|
290
393
|
def stop_processes(self):
|
|
291
394
|
"""
|
|
292
395
|
Stops all running processes.
|
|
@@ -310,24 +413,135 @@ class _ProcessManager:
|
|
|
310
413
|
self.start_scheduler()
|
|
311
414
|
self.start_workers()
|
|
312
415
|
|
|
416
|
+
def kill_all_workers(self):
|
|
417
|
+
"""
|
|
418
|
+
Kill all running worker processes.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
NA.
|
|
422
|
+
"""
|
|
423
|
+
for p in self.cgn_processes:
|
|
424
|
+
if p.poll() is None:
|
|
425
|
+
p.kill()
|
|
426
|
+
self.cgn_processes.clear()
|
|
427
|
+
|
|
428
|
+
for p in self.tail_cgn_processes:
|
|
429
|
+
if p is not None:
|
|
430
|
+
p.kill()
|
|
431
|
+
self.tail_cgn_processes.clear()
|
|
432
|
+
|
|
433
|
+
def kill_single_worker(self, pid):
|
|
434
|
+
"""
|
|
435
|
+
Kill one worker process with specified pid.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
pid: Worker process' pid.
|
|
439
|
+
"""
|
|
440
|
+
kill_status = False
|
|
441
|
+
for i in range(len(self.cgn_processes)):
|
|
442
|
+
p = self.cgn_processes[i]
|
|
443
|
+
if p.pid == pid and p.poll() is None:
|
|
444
|
+
p.kill()
|
|
445
|
+
del self.cgn_processes[i]
|
|
446
|
+
tail_p = self.tail_cgn_processes[i]
|
|
447
|
+
if tail_p is not None:
|
|
448
|
+
tail_p.kill()
|
|
449
|
+
del self.tail_cgn_processes[i]
|
|
450
|
+
kill_status = True
|
|
451
|
+
break
|
|
452
|
+
if not kill_status:
|
|
453
|
+
logger.warning(f"There's no active worker with pid: {pid}")
|
|
454
|
+
|
|
455
|
+
def kill_workers(self, pids):
|
|
456
|
+
"""
|
|
457
|
+
Kill worker process according to pids. Worker process with pid within pids list will be killed.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
pids(list): a list of worker process pid. When local_ranks pids -1, kill all worker process.
|
|
461
|
+
"""
|
|
462
|
+
if -1 in pids:
|
|
463
|
+
self.kill_all_workers()
|
|
464
|
+
else:
|
|
465
|
+
for pid in pids:
|
|
466
|
+
self.kill_single_worker(pid)
|
|
467
|
+
return 0
|
|
468
|
+
|
|
469
|
+
def monitor_rank_status(self, local_ranks):
|
|
470
|
+
"""
|
|
471
|
+
Monitor the status of workers whose rank is within local_ranks list.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
local_ranks(list): a list of local worker ranks. When local_ranks contains -1,
|
|
475
|
+
monitor all workers' status.
|
|
476
|
+
"""
|
|
477
|
+
rank_status = {}
|
|
478
|
+
if -1 in local_ranks:
|
|
479
|
+
local_ranks = list(range(self.local_worker_num))
|
|
480
|
+
for i in local_ranks:
|
|
481
|
+
single_status = self.monitor_single_rank(i)
|
|
482
|
+
if single_status:
|
|
483
|
+
rank_status[i] = single_status
|
|
484
|
+
return rank_status
|
|
485
|
+
|
|
486
|
+
def monitor_single_rank(self, rank_id):
|
|
487
|
+
"""
|
|
488
|
+
Monitor the status of a single worker with rank_id
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
rank_id: worker process's local rank, which is also device_id.
|
|
492
|
+
"""
|
|
493
|
+
if 0 <= rank_id < self.local_worker_num:
|
|
494
|
+
global_rank_id = rank_id
|
|
495
|
+
if self.node_rank >= 0:
|
|
496
|
+
global_rank_id = self.node_rank * self.local_worker_num + rank_id
|
|
497
|
+
try:
|
|
498
|
+
p = self.proc_rank_map[rank_id]
|
|
499
|
+
p_status = p.poll()
|
|
500
|
+
if (not psutil.pid_exists(p.pid)) and (p_status != 0):
|
|
501
|
+
p_status = 300
|
|
502
|
+
return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id}
|
|
503
|
+
except KeyError:
|
|
504
|
+
logger.info(f"Process rank {rank_id} has not been initialized.")
|
|
505
|
+
return {"pid": None, "status": 200, "global_rank": global_rank_id}
|
|
506
|
+
else:
|
|
507
|
+
logger.warning(f"Invalid rank id!")
|
|
508
|
+
return {}
|
|
509
|
+
|
|
510
|
+
def start_all_workers(self):
|
|
511
|
+
"""
|
|
512
|
+
Start all worker processes after killing all workers.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
NA.
|
|
516
|
+
"""
|
|
517
|
+
if self.cgn_processes:
|
|
518
|
+
self.kill_all_workers()
|
|
519
|
+
self.start_workers()
|
|
520
|
+
worker_status = self.monitor_rank_status([-1])
|
|
521
|
+
for i in range(self.local_worker_num):
|
|
522
|
+
if worker_status[i]["status"] != None: # pylint: disable=singleton-comparison
|
|
523
|
+
return 1
|
|
524
|
+
return 0
|
|
525
|
+
|
|
313
526
|
def _get_node_id_and_log_path(self, index):
|
|
314
527
|
"""
|
|
315
528
|
Generate node id and log path for corresponding process.
|
|
316
529
|
"""
|
|
530
|
+
formatted_log_name = self.format_worker_log_name()
|
|
317
531
|
if self.local_worker_num > self.worker_num:
|
|
318
532
|
raise ValueError(f"Total worker number is {self.worker_num}, "
|
|
319
533
|
f"but got exceeded local worker number: {self.local_worker_num}.")
|
|
320
534
|
if self.local_worker_num == self.worker_num:
|
|
321
|
-
return index, os.path.join(self.log_dir, "
|
|
535
|
+
return index, os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
|
|
322
536
|
|
|
323
537
|
if self.node_rank >= 0:
|
|
324
538
|
# We assume that each node has same process number.
|
|
325
539
|
node_id = self.node_rank * self.local_worker_num + index
|
|
326
|
-
log_name = os.path.join(self.log_dir, "
|
|
540
|
+
log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(node_id) + ".log")
|
|
327
541
|
else:
|
|
328
542
|
# If node_rank is default value -1, let MindSpore assign rank id.
|
|
329
543
|
node_id = None
|
|
330
|
-
log_name = os.path.join(self.log_dir, "
|
|
544
|
+
log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
|
|
331
545
|
return node_id, log_name
|
|
332
546
|
|
|
333
547
|
def _analyze_log(self):
|
|
@@ -350,3 +564,15 @@ class _ProcessManager:
|
|
|
350
564
|
logger.error(f"Time out nodes are {time_out_node_ids}")
|
|
351
565
|
|
|
352
566
|
os.system(f"grep -rn -E 'ERROR|CRITICAL|Traceback|Error' -C 5 {self.log_dir}")
|
|
567
|
+
|
|
568
|
+
def format_worker_log_name(self):
|
|
569
|
+
"""
|
|
570
|
+
Format worker log files' name.
|
|
571
|
+
"""
|
|
572
|
+
if not self.worker_log_name:
|
|
573
|
+
formatted_worker_log_name = "worker"
|
|
574
|
+
else:
|
|
575
|
+
current_ip = _get_local_ip(self.master_addr)
|
|
576
|
+
formatted_worker_log_name = re.sub(r'\{ip\}', current_ip, self.worker_log_name)
|
|
577
|
+
formatted_worker_log_name = re.sub(r'\{hostname\}', socket.gethostname(), formatted_worker_log_name)
|
|
578
|
+
return formatted_worker_log_name
|
|
@@ -16,8 +16,11 @@
|
|
|
16
16
|
import os
|
|
17
17
|
import json
|
|
18
18
|
import socket
|
|
19
|
+
import ipaddress
|
|
19
20
|
import mindspore.log as logger
|
|
20
21
|
|
|
22
|
+
CURRENT_IP = None
|
|
23
|
+
|
|
21
24
|
def _generate_cmd(cmd, cmd_args, output_name):
|
|
22
25
|
"""
|
|
23
26
|
Generates a command string to execute a Python script in the background, r
|
|
@@ -67,6 +70,24 @@ def _generate_url(addr, port):
|
|
|
67
70
|
return url
|
|
68
71
|
|
|
69
72
|
|
|
73
|
+
def _get_local_ip(ip_address):
|
|
74
|
+
"""
|
|
75
|
+
Get current IP address.
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
global CURRENT_IP
|
|
79
|
+
if CURRENT_IP is None:
|
|
80
|
+
try:
|
|
81
|
+
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
82
|
+
s.connect((ip_address, 0))
|
|
83
|
+
CURRENT_IP = s.getsockname()[0]
|
|
84
|
+
s.close()
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise RuntimeError(f"Get local ip failed: {e}. Please check whether an accessible address "
|
|
87
|
+
"is input by '--master_address'.")
|
|
88
|
+
return CURRENT_IP
|
|
89
|
+
|
|
90
|
+
|
|
70
91
|
def _is_local_ip(ip_address):
|
|
71
92
|
"""
|
|
72
93
|
Check if the current input IP address is a local IP address.
|
|
@@ -75,13 +96,8 @@ def _is_local_ip(ip_address):
|
|
|
75
96
|
p = os.popen("ip -j addr")
|
|
76
97
|
addr_info_str = p.read()
|
|
77
98
|
p.close()
|
|
99
|
+
current_ip = _get_local_ip(ip_address)
|
|
78
100
|
if not addr_info_str:
|
|
79
|
-
# This means this host has no "ip -j addr" command.
|
|
80
|
-
# We use socket module to get local ip address.
|
|
81
|
-
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
82
|
-
s.connect((ip_address, 0))
|
|
83
|
-
current_ip = s.getsockname()[0]
|
|
84
|
-
s.close()
|
|
85
101
|
return current_ip == ip_address
|
|
86
102
|
|
|
87
103
|
addr_infos = json.loads(addr_info_str)
|
|
@@ -93,6 +109,25 @@ def _is_local_ip(ip_address):
|
|
|
93
109
|
return False
|
|
94
110
|
|
|
95
111
|
|
|
112
|
+
def _convert_addr_to_ip(master_addr):
|
|
113
|
+
"""
|
|
114
|
+
Check whether the input parameter 'master_addr' is IPv4. If a hostname is inserted, it will be converted
|
|
115
|
+
to IP and then set as master host's IP.
|
|
116
|
+
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
ipaddress.IPv4Address(master_addr)
|
|
120
|
+
return master_addr
|
|
121
|
+
except ipaddress.AddressValueError:
|
|
122
|
+
try:
|
|
123
|
+
ip_address = socket.gethostbyname(master_addr)
|
|
124
|
+
logger.info(f"Convert input host name:{master_addr} to ip address:{ip_address}.")
|
|
125
|
+
return ip_address
|
|
126
|
+
except socket.gaierror as e:
|
|
127
|
+
raise RuntimeError(f"DNS resolution failed: {e}. Please check whether a correct host name "
|
|
128
|
+
"is input by '--master_address'.")
|
|
129
|
+
|
|
130
|
+
|
|
96
131
|
def _send_scale_num(url, scale_num):
|
|
97
132
|
"""
|
|
98
133
|
Send an HTTP request to a specified URL, informing scale_num.
|
|
@@ -37,8 +37,8 @@ def get_args():
|
|
|
37
37
|
parser.add_argument(
|
|
38
38
|
"--master_addr",
|
|
39
39
|
default="127.0.0.1", type=str,
|
|
40
|
-
help="specifies the IP address of the scheduler and its data type is string."
|
|
41
|
-
" Allowed values: valid IP addresses."
|
|
40
|
+
help="specifies the IP address or the host name of the scheduler and its data type is string."
|
|
41
|
+
" Allowed values: valid IP addresses or valid host name."
|
|
42
42
|
)
|
|
43
43
|
parser.add_argument(
|
|
44
44
|
"--master_port", default=8118, type=int,
|
|
@@ -85,13 +85,13 @@ def get_args():
|
|
|
85
85
|
"--sim_level",
|
|
86
86
|
default=-1,
|
|
87
87
|
type=int,
|
|
88
|
-
choices=[0, 1],
|
|
88
|
+
choices=[0, 1, 2, 3],
|
|
89
89
|
help="specifies simulation level. When this argument is set, msrun only spawns one process "
|
|
90
90
|
"but export RANK_SIZE with value worker_num and RANK_ID with value sim_rank_id."
|
|
91
91
|
)
|
|
92
92
|
parser.add_argument(
|
|
93
93
|
"--sim_rank_id",
|
|
94
|
-
default
|
|
94
|
+
default=-1,
|
|
95
95
|
type=int,
|
|
96
96
|
help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
|
|
97
97
|
)
|
|
@@ -102,6 +102,23 @@ def get_args():
|
|
|
102
102
|
help="specifies rank table file path. This path is not used to initialize distributed job in "
|
|
103
103
|
"'rank table file manner' but to help support other features."
|
|
104
104
|
)
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--worker_log_name",
|
|
107
|
+
default="",
|
|
108
|
+
type=str,
|
|
109
|
+
help="Specifies the worker log file name as a string for current node; the default is worker_[rankid]. "
|
|
110
|
+
"Support configuring the current IP address and host name by using {ip} and {hostname} respectively. "
|
|
111
|
+
"e.g. --worker_log_name=worker_{ip}_{hostname}_test, worker [rankid] log name for current node "
|
|
112
|
+
"will be worker_[real IP address]_[real host name]_test_[rankid]."
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--tail_worker_log",
|
|
116
|
+
default="-1",
|
|
117
|
+
type=str,
|
|
118
|
+
help="Only tail worker log to console when '--join=True' and the configured value should be within "
|
|
119
|
+
"[0, local_worker_num], otherwise worker log will not be tail. All worker logs will be tail by "
|
|
120
|
+
"default. Support tail the specified worker log (e.g. --tail_log=0 tail the worker 0 log to console)."
|
|
121
|
+
)
|
|
105
122
|
parser.add_argument(
|
|
106
123
|
"task_script",
|
|
107
124
|
type=str,
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Parallel function operator
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from mindspore.parallel.function.reshard_func import reshard
|
|
21
|
+
|
|
22
|
+
__all__ = []
|
|
23
|
+
__all__.extend(reshard_func.__all__)
|
|
24
|
+
__all__.sort()
|