mindspore 2.1.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +139 -22
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +16 -12
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/kernel_exec.py +98 -274
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
- mindspore/_akg/akg/utils/util.py +56 -1
- mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +13 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +67 -72
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +86 -106
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +29 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +33 -7
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8998 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +61 -95
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +87 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +31 -19
- mindspore/ops/operations/_grad_ops.py +71 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +192 -144
- mindspore/ops/operations/nn_ops.py +857 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +42 -21
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +8 -8
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +488 -539
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -21,6 +21,7 @@ import copy
|
|
|
21
21
|
from collections import defaultdict
|
|
22
22
|
import numpy as np
|
|
23
23
|
import mindspore as ms
|
|
24
|
+
from mindspore.common import dtype as mstype
|
|
24
25
|
from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
|
|
25
26
|
_transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, \
|
|
26
27
|
_extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
|
|
@@ -35,8 +36,7 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
|
|
|
35
36
|
"""
|
|
36
37
|
Merge parallel strategy between all pipeline stages in pipeline parallel mode.
|
|
37
38
|
For more details about converting distributed Checkpoint, please refer to
|
|
38
|
-
`
|
|
39
|
-
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
|
|
39
|
+
`Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
|
|
40
40
|
|
|
41
41
|
Note:
|
|
42
42
|
Strategy file of each pipeline stage should be included in src_strategy_dirs.
|
|
@@ -76,17 +76,16 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
|
|
|
76
76
|
"""
|
|
77
77
|
List of original distributed checkpoint rank index for obtaining the target checkpoint of a rank_id
|
|
78
78
|
during the distributed checkpoint conversion. For more details about converting distributed Checkpoint,
|
|
79
|
-
please refer to `
|
|
80
|
-
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
|
|
79
|
+
please refer to `Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
|
|
81
80
|
|
|
82
81
|
Args:
|
|
83
82
|
rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
|
|
84
83
|
src_strategy_file (str): Name of source sharding strategy file which saved by
|
|
85
|
-
|
|
84
|
+
`mindspore.set_auto_parallel_context(strategy_ckpt_save_file)`.
|
|
86
85
|
when the 'src_strategy_file' is None, it means that the source sharding strategy is
|
|
87
86
|
without any sharing for each parameter. Default:None.
|
|
88
87
|
dst_strategy_file (str): Name of destination sharding strategy file which saved by
|
|
89
|
-
|
|
88
|
+
`mindspore.set_auto_parallel_context(strategy_ckpt_save_file)`.
|
|
90
89
|
when the 'dst_strategy_file' is None, it means that the destination sharding strategy
|
|
91
90
|
is without any sharing for each parameter. Default:None.
|
|
92
91
|
|
|
@@ -139,8 +138,7 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
|
|
|
139
138
|
"""
|
|
140
139
|
Transform distributed checkpoint from source sharding strategy to destination sharding strategy by rank
|
|
141
140
|
for a network. For more details about converting distributed Checkpoint, please refer to
|
|
142
|
-
`
|
|
143
|
-
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
|
|
141
|
+
`Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
|
|
144
142
|
|
|
145
143
|
Args:
|
|
146
144
|
rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
|
|
@@ -195,6 +193,7 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
|
|
|
195
193
|
raise ValueError("Checkpoint file {} in rank {} not exits: ".format(local_file, rank))
|
|
196
194
|
param_total_dict = defaultdict(dict)
|
|
197
195
|
param_attr_dict = defaultdict(dict)
|
|
196
|
+
param_type_dict = defaultdict(dict)
|
|
198
197
|
src_strategy_list, dst_strategy_list = _extract_src_dst_layout_map(rank_id, src_strategy_file, dst_strategy_file)
|
|
199
198
|
# src rank => local rank inside pipeline stage
|
|
200
199
|
src_stage_device_num = np.prod(src_strategy_list.get(list(src_strategy_list.keys())[0])[0]) if src_strategy_list \
|
|
@@ -211,11 +210,15 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
|
|
|
211
210
|
and _parameter_not_in_local_stage(param_name, origin_dst_strategy_list, dst_strategy_list):
|
|
212
211
|
continue
|
|
213
212
|
src_rank = rank % src_stage_device_num
|
|
213
|
+
param_type_dict[param_name][src_rank] = str(param.data.dtype)
|
|
214
|
+
if param.data.dtype == mstype.bfloat16:
|
|
215
|
+
param.set_dtype(mstype.float32)
|
|
214
216
|
param_total_dict[param_name][src_rank] = param.data.asnumpy()
|
|
215
217
|
param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
|
|
216
218
|
local_rank_id = rank_id % dst_stage_device_num
|
|
217
219
|
transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict,
|
|
218
|
-
param_attr_dict, src_strategy_list, dst_strategy_list
|
|
220
|
+
param_attr_dict, src_strategy_list, dst_strategy_list,
|
|
221
|
+
param_type_dict)
|
|
219
222
|
ms.save_checkpoint(transform_param_list, save_checkpoint_file_name)
|
|
220
223
|
|
|
221
224
|
|
|
@@ -224,8 +227,7 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
224
227
|
"""
|
|
225
228
|
Transform distributed checkpoint from source sharding strategy to destination sharding strategy for a rank.
|
|
226
229
|
For more details about converting distributed Checkpoint, please refer to
|
|
227
|
-
`
|
|
228
|
-
Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
|
|
230
|
+
`Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
|
|
229
231
|
|
|
230
232
|
Note:
|
|
231
233
|
The `src_checkpoints_dir` directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
|
|
@@ -301,11 +303,15 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
301
303
|
for needed_rank_list_key, transform_rank_list in needed_rank_list_map.items():
|
|
302
304
|
param_total_dict = defaultdict(dict)
|
|
303
305
|
param_attr_dict = defaultdict(dict)
|
|
306
|
+
param_type_dict = defaultdict(dict)
|
|
304
307
|
needed_rank_list = needed_rank_list_key.split("-")
|
|
305
308
|
for needed_rank in needed_rank_list:
|
|
306
309
|
ckpt_dict = ms.load_checkpoint(all_checkpoint_files_map.get(int(needed_rank)))
|
|
307
310
|
for param_name, param in ckpt_dict.items():
|
|
308
311
|
src_rank = int(needed_rank) % src_stage_device_num
|
|
312
|
+
param_type_dict[param_name][src_rank] = str(param.data.dtype)
|
|
313
|
+
if param.data.dtype == mstype.bfloat16:
|
|
314
|
+
param.set_dtype(mstype.float32)
|
|
309
315
|
param_total_dict[param_name][src_rank] = param.data.asnumpy()
|
|
310
316
|
param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
|
|
311
317
|
for transform_rank in transform_rank_list:
|
|
@@ -320,7 +326,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
320
326
|
|
|
321
327
|
local_rank_id = transform_rank % dst_stage_device_num
|
|
322
328
|
transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict_copy,
|
|
323
|
-
param_attr_dict, src_strategy_list, dst_strategy_list
|
|
329
|
+
param_attr_dict, src_strategy_list, dst_strategy_list,
|
|
330
|
+
param_type_dict)
|
|
324
331
|
save_checkpoint_file = "{}{}.ckpt".format(ckpt_prefix, transform_rank)
|
|
325
332
|
save_checkpoint_file_dir = os.path.join(dst_checkpoints_dir, "rank_{}".format(transform_rank))
|
|
326
333
|
if not os.path.exists(save_checkpoint_file_dir):
|
mindspore/parallel/shard.py
CHANGED
|
@@ -24,7 +24,7 @@ class Shard(Shard_):
|
|
|
24
24
|
|
|
25
25
|
def __init__(self):
|
|
26
26
|
"""Initialize Shard."""
|
|
27
|
-
|
|
27
|
+
super().__init__('Shard')
|
|
28
28
|
self.shard_fn = None
|
|
29
29
|
self.fn = None
|
|
30
30
|
self.in_strategy = None
|
|
@@ -36,13 +36,17 @@ class Shard(Shard_):
|
|
|
36
36
|
def __call__(self, fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
|
|
37
37
|
if ms.context.get_context("mode") != ms.context.PYNATIVE_MODE or \
|
|
38
38
|
ms.context.get_auto_parallel_context("parallel_mode") not in ["auto_parallel"]:
|
|
39
|
-
raise AssertionError(
|
|
39
|
+
raise AssertionError(
|
|
40
|
+
f"Cell shard only supports auto parallel under PyNative mode.")
|
|
40
41
|
if ms.context.get_context("device_target") not in ["Ascend", "GPU"]:
|
|
41
|
-
raise AssertionError(
|
|
42
|
+
raise AssertionError(
|
|
43
|
+
f"'Shard' now only supports 'Ascend' and 'GPU'")
|
|
42
44
|
if ms.context.get_auto_parallel_context("search_mode") != "sharding_propagation":
|
|
43
|
-
raise AssertionError(
|
|
45
|
+
raise AssertionError(
|
|
46
|
+
f"'search_mode' must be 'sharding_propagation' for 'Shard'")
|
|
44
47
|
if not isinstance(in_strategy, tuple):
|
|
45
|
-
raise TypeError(
|
|
48
|
+
raise TypeError(
|
|
49
|
+
f"For 'Shard', the 'in_strategy' should be a tuple, but got {type(in_strategy).__name__}")
|
|
46
50
|
if not isinstance(out_strategy, (type(None), tuple)):
|
|
47
51
|
raise TypeError(f"For 'Shard', the 'out_strategy' should be None or tuple, "
|
|
48
52
|
f"but got {type(out_strategy).__name__}")
|
|
@@ -117,7 +121,8 @@ class Shard(Shard_):
|
|
|
117
121
|
return
|
|
118
122
|
if isinstance(parameter_plan, dict):
|
|
119
123
|
if not isinstance(fn, ms.nn.Cell):
|
|
120
|
-
raise TypeError(
|
|
124
|
+
raise TypeError(
|
|
125
|
+
f"If parameter_plan is set, type of fn must be mindspore.nn.Cell, but got {type(fn)}")
|
|
121
126
|
for k in parameter_plan.keys():
|
|
122
127
|
v = parameter_plan[k]
|
|
123
128
|
if not isinstance(k, str) or not isinstance(v, tuple):
|
|
@@ -131,10 +136,12 @@ class Shard(Shard_):
|
|
|
131
136
|
param_strategy = parameter_plan[param_name]
|
|
132
137
|
param = self._search_parameter_by_name(param_name, fn)
|
|
133
138
|
if param is None:
|
|
134
|
-
logger.warning(
|
|
139
|
+
logger.warning(
|
|
140
|
+
f"{param_name} is not exist, ignored its setting.")
|
|
135
141
|
continue
|
|
136
142
|
|
|
137
|
-
self._check_layout_is_valid(
|
|
143
|
+
self._check_layout_is_valid(
|
|
144
|
+
param_name, param.shape, param_strategy)
|
|
138
145
|
if param.param_info.param_strategy:
|
|
139
146
|
logger.warning(f"The layout of parameter '{param_name}' "
|
|
140
147
|
f"has been set to {param.param_info.param_strategy}, "
|
|
@@ -143,7 +150,7 @@ class Shard(Shard_):
|
|
|
143
150
|
|
|
144
151
|
def _is_attrs_has_been_set(self, fn, in_strategy, out_strategy, device, level):
|
|
145
152
|
return self.shard_fn is not None and self.fn == fn and self.in_strategy == in_strategy and \
|
|
146
|
-
|
|
153
|
+
self.out_strategy == out_strategy and self.device == device and self.level == level
|
|
147
154
|
|
|
148
155
|
|
|
149
156
|
def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
|
|
@@ -159,8 +166,8 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
159
166
|
|
|
160
167
|
Note:
|
|
161
168
|
You need to set the execution mode to PyNative mode,
|
|
162
|
-
set the parallel mode in `set_auto_parallel_context` to "auto_parallel"
|
|
163
|
-
and the search mode to "sharding_propagation".
|
|
169
|
+
set the parallel mode in `set_auto_parallel_context` (parallel_mode) to "auto_parallel"
|
|
170
|
+
and the search mode (search_mode) to "sharding_propagation".
|
|
164
171
|
If the input contain Parameter, its strategy should be set in `in_strategy`.
|
|
165
172
|
|
|
166
173
|
Args:
|
|
@@ -216,15 +223,15 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
|
|
|
216
223
|
... device_num=2)
|
|
217
224
|
>>> def test_shard(x, y):
|
|
218
225
|
... return x + y
|
|
219
|
-
>>> x = Tensor(np.ones(shape=(32, 10)))
|
|
220
|
-
>>> y = Tensor(np.ones(shape=(32, 10)))
|
|
226
|
+
>>> x = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
|
|
227
|
+
>>> y = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
|
|
221
228
|
>>> output = ms.shard(test_shard, in_strategy=((2, 1), (2, 1)))(x, y)
|
|
222
229
|
>>> print(output.shape)
|
|
223
230
|
(32, 10)
|
|
224
231
|
|
|
225
232
|
Tutorial Examples:
|
|
226
233
|
- `Functional Operator Sharding
|
|
227
|
-
<https://www.mindspore.cn/
|
|
234
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/pynative_shard_function_parallel.html>`_
|
|
228
235
|
"""
|
|
229
236
|
if not isinstance(fn, (ms.nn.Cell)):
|
|
230
237
|
logger.warning("'fn' is not a mindspore.nn.Cell, and its definition cannot involve Parameter; "
|
|
@@ -72,7 +72,7 @@ class StructType(Enum):
|
|
|
72
72
|
"""
|
|
73
73
|
Parse the binary data to get the unpacked data.
|
|
74
74
|
|
|
75
|
-
Args
|
|
75
|
+
Args:
|
|
76
76
|
data_struct (dict): Key is the data name, value is StructType.
|
|
77
77
|
binary_data (str): This value should be a binary string.
|
|
78
78
|
special_func (Callable): This is a callable function,
|
|
@@ -105,14 +105,14 @@ class StructType(Enum):
|
|
|
105
105
|
for name, data_type in data_struct.items():
|
|
106
106
|
data_size = StructType.sizeof(data_type)
|
|
107
107
|
if special_process_func:
|
|
108
|
-
unpack_data, success = special_process_func(binary_data[cursor:cursor+data_size], name,
|
|
108
|
+
unpack_data, success = special_process_func(binary_data[cursor:cursor + data_size], name,
|
|
109
109
|
data_type, unpacked_data)
|
|
110
110
|
if success:
|
|
111
111
|
cursor += data_size
|
|
112
112
|
unpacked_data[name] = unpack_data
|
|
113
113
|
continue
|
|
114
114
|
|
|
115
|
-
unpack_data = struct.unpack(data_type.value, binary_data[cursor: cursor+data_size])[0]
|
|
115
|
+
unpack_data = struct.unpack(data_type.value, binary_data[cursor: cursor + data_size])[0]
|
|
116
116
|
cursor += data_size
|
|
117
117
|
unpacked_data[name] = unpack_data
|
|
118
118
|
return unpacked_data
|
|
@@ -23,6 +23,8 @@ import os
|
|
|
23
23
|
import re
|
|
24
24
|
import stat
|
|
25
25
|
|
|
26
|
+
from mindspore import log as logger
|
|
27
|
+
|
|
26
28
|
|
|
27
29
|
def to_int(param, param_name):
|
|
28
30
|
"""
|
|
@@ -80,9 +82,9 @@ def fwrite_format(output_data_path, data_source=None, is_print=False, is_start=F
|
|
|
80
82
|
if isinstance(raw_data, (list, tuple)):
|
|
81
83
|
raw_data = map(str, raw_data)
|
|
82
84
|
raw_data = " ".join(raw_data)
|
|
83
|
-
|
|
85
|
+
logger.info(raw_data)
|
|
84
86
|
else:
|
|
85
|
-
|
|
87
|
+
logger.info(data_source)
|
|
86
88
|
|
|
87
89
|
|
|
88
90
|
def get_log_slice_id(file_name):
|
|
@@ -68,7 +68,7 @@ def construct_profiling_options():
|
|
|
68
68
|
logger.error(
|
|
69
69
|
"The format of MS_PROFILER_OPTIONS is incorrect. "
|
|
70
70
|
"The MS_PROFILER_OPTIONS parameter configuration may refer to "
|
|
71
|
-
"'https://www.mindspore.cn/mindinsight/docs/zh-CN/
|
|
71
|
+
"'https://www.mindspore.cn/mindinsight/docs/zh-CN/r2.2/performance_profiling_ascend.html'."
|
|
72
72
|
)
|
|
73
73
|
return error_config
|
|
74
74
|
conbine_options = combine_profile_options(profiling_options)
|
|
@@ -72,9 +72,11 @@ class DataPreProcessParser:
|
|
|
72
72
|
_source_file_target_old = 'DATA_PREPROCESS.dev.AICPU.'
|
|
73
73
|
_source_file_target = 'DATA_PREPROCESS.AICPU.'
|
|
74
74
|
_dst_file_title = 'title:DATA_PREPROCESS AICPU'
|
|
75
|
-
_dst_file_column_title = [
|
|
76
|
-
|
|
77
|
-
|
|
75
|
+
_dst_file_column_title = [
|
|
76
|
+
'serial_number', 'node_type_name', 'total_time(ms)',
|
|
77
|
+
'dispatch_time(ms)', 'execution_time(ms)', 'run_start',
|
|
78
|
+
'run_end'
|
|
79
|
+
]
|
|
78
80
|
_ms_unit = 1000
|
|
79
81
|
_us_unit = 100 # Convert 10ns to 1us.
|
|
80
82
|
_task_id_threshold = 65536
|
|
@@ -78,7 +78,7 @@ class AscendFlopsGenerator:
|
|
|
78
78
|
writer.writerows(self.flops.tolist())
|
|
79
79
|
except (IOError, OSError) as err:
|
|
80
80
|
logging.critical('Errot occurred when write flops file: %s', err)
|
|
81
|
-
raise ProfilerIOException()
|
|
81
|
+
raise ProfilerIOException() from err
|
|
82
82
|
if os.path.exists(flops_path):
|
|
83
83
|
os.chmod(flops_path, stat.S_IREAD | stat.S_IWRITE)
|
|
84
84
|
|
|
@@ -89,6 +89,6 @@ class AscendFlopsGenerator:
|
|
|
89
89
|
json.dump(self.flops_summary, json_file)
|
|
90
90
|
except (IOError, OSError) as err:
|
|
91
91
|
logging.critical('Errot occurred when write step trace point info file: %s', err)
|
|
92
|
-
raise ProfilerIOException()
|
|
92
|
+
raise ProfilerIOException() from err
|
|
93
93
|
if os.path.exists(flops_summary_path):
|
|
94
94
|
os.chmod(flops_summary_path, stat.S_IREAD | stat.S_IWRITE)
|
|
@@ -71,6 +71,6 @@ class AscendFPBPGenerator:
|
|
|
71
71
|
json.dump(self.points, json_file)
|
|
72
72
|
except (IOError, OSError) as err:
|
|
73
73
|
logging.critical('Errot occurred when write step trace point info file: %s', err)
|
|
74
|
-
raise ProfilerIOException()
|
|
74
|
+
raise ProfilerIOException() from err
|
|
75
75
|
if os.path.exists(step_trace_point_info_path):
|
|
76
76
|
os.chmod(step_trace_point_info_path, stat.S_IREAD | stat.S_IWRITE)
|
|
@@ -82,6 +82,238 @@ def count_average(data):
|
|
|
82
82
|
class AscendHCCLGenerator:
|
|
83
83
|
"""Generate ascend hccl data from files."""
|
|
84
84
|
|
|
85
|
+
def __init__(self, source_path, steptrace):
|
|
86
|
+
self.root_path = source_path
|
|
87
|
+
self.steptrace = steptrace
|
|
88
|
+
self.hccl_raw = []
|
|
89
|
+
self.hccl_data_df = np.dtype(
|
|
90
|
+
[('model_id', int), ('iteration_id', int), ('name', object), ('pid', int), ('tid', int), ('ts', float),
|
|
91
|
+
('te', float), ('dur', float), ('ph', object),
|
|
92
|
+
('task_type', object), ('link_info', object), ('transport_type', object), ('size', int), ('tag', object)])
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _cost_analyse(iteration):
|
|
96
|
+
"""analyse communication cost and wait cost"""
|
|
97
|
+
communication_cost = np.sum(iteration[iteration['name'] != 'Notify_Wait']['dur'])
|
|
98
|
+
wait_cost = np.sum(iteration[iteration['name'] == 'Notify_Wait']['dur'])
|
|
99
|
+
return communication_cost, wait_cost
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _rdma_analyse(groupby_transport):
|
|
103
|
+
"""rdma analyse"""
|
|
104
|
+
thread_groups = np.unique(groupby_transport['tid'])
|
|
105
|
+
thread_information = []
|
|
106
|
+
for thread_index in thread_groups:
|
|
107
|
+
groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
|
|
108
|
+
rdma_communication_time = 0
|
|
109
|
+
rdma_communication_size = 0
|
|
110
|
+
rdma_communication_wait_time = 0
|
|
111
|
+
start_index = 0
|
|
112
|
+
end_index = groupby_thread.size - 2
|
|
113
|
+
while start_index < end_index:
|
|
114
|
+
first_task_type = groupby_thread[start_index]['task_type']
|
|
115
|
+
if first_task_type == 'RDMASend':
|
|
116
|
+
second_index = start_index + 1
|
|
117
|
+
third_index = start_index + 2
|
|
118
|
+
second_task_type = groupby_thread[second_index]['task_type']
|
|
119
|
+
third_task_type = groupby_thread[third_index]['task_type']
|
|
120
|
+
if second_task_type == 'RDMASend' and third_task_type == 'Notify Wait':
|
|
121
|
+
rdma_send_cost = groupby_thread[start_index]['dur']
|
|
122
|
+
notify_record_cost = groupby_thread[second_index]['dur']
|
|
123
|
+
notify_wait_cost = groupby_thread[third_index]['dur']
|
|
124
|
+
rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
|
|
125
|
+
rdma_communication_wait_time += notify_wait_cost
|
|
126
|
+
rdma_communication_size += groupby_thread[start_index]['size'] + groupby_thread[second_index][
|
|
127
|
+
'size']
|
|
128
|
+
start_index += 2
|
|
129
|
+
start_index += 1
|
|
130
|
+
rdma_communication_wait_time = rdma_communication_wait_time / 1e3
|
|
131
|
+
rdma_communication_size = rdma_communication_size / 1e3
|
|
132
|
+
rdma_communication_time = rdma_communication_time / 1e3
|
|
133
|
+
rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
|
|
134
|
+
if rdma_communication_size else 0
|
|
135
|
+
thread_information.append(
|
|
136
|
+
[rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time])
|
|
137
|
+
if len(thread_information) > 1:
|
|
138
|
+
thread_information = np.sum(thread_information, axis=0).tolist()
|
|
139
|
+
|
|
140
|
+
return thread_information
|
|
141
|
+
|
|
142
|
+
def parse(self):
|
|
143
|
+
"""Analyse the original hccl data generator hccl data."""
|
|
144
|
+
hccl_data = []
|
|
145
|
+
for hccl_file in find_files(self.root_path, "hccl_*.json"):
|
|
146
|
+
with open(hccl_file) as fr:
|
|
147
|
+
hccl_data.append(self._original_data_analyse(json.load(fr)))
|
|
148
|
+
hccl_data = np.concatenate(hccl_data)
|
|
149
|
+
|
|
150
|
+
for model_id in np.unique(hccl_data['model_id']):
|
|
151
|
+
hccl_data_model = hccl_data[hccl_data['model_id'] == model_id]
|
|
152
|
+
for iteration_id in np.unique(hccl_data_model['iteration_id']):
|
|
153
|
+
hccl_data_model_iteration = hccl_data_model[hccl_data_model['iteration_id'] == iteration_id]
|
|
154
|
+
|
|
155
|
+
hccl_abstract_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] == '']
|
|
156
|
+
hccl_detail_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] != '']
|
|
157
|
+
hccl_abstract_data = np.sort(hccl_abstract_data, order='ts')
|
|
158
|
+
hccl_detail_data = np.sort(hccl_detail_data, order='ts')
|
|
159
|
+
|
|
160
|
+
tag = np.searchsorted(hccl_abstract_data['ts'], hccl_detail_data['ts'], side='right') - 1
|
|
161
|
+
|
|
162
|
+
hccl_detail_data['tag'] = [x[-1] for x in
|
|
163
|
+
np.char.split(hccl_abstract_data[tag]['name'].astype(str), sep='/')]
|
|
164
|
+
|
|
165
|
+
self.hccl_raw.append(self._iteration_analyse(hccl_detail_data, iteration_id))
|
|
166
|
+
|
|
167
|
+
self.hccl_raw = sorted(self.hccl_raw, key=lambda x: x[0])
|
|
168
|
+
self.hccl_raw.append(copy.deepcopy(self.hccl_raw[-1]))
|
|
169
|
+
self.hccl_raw[-1][0] = '-'
|
|
170
|
+
for _, value in self.hccl_raw[-1][4].items():
|
|
171
|
+
value[0] = '-'
|
|
172
|
+
|
|
173
|
+
def write(self, hccl_raw_path):
|
|
174
|
+
"""
|
|
175
|
+
Write the flops.csv and flops_summary.json
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
hccl_raw_path(str): hccl_raw.csv path.
|
|
179
|
+
"""
|
|
180
|
+
try:
|
|
181
|
+
with os.fdopen(os.open(hccl_raw_path,
|
|
182
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), 'w',
|
|
183
|
+
newline='') as hccl_row:
|
|
184
|
+
writer = csv.writer(hccl_row)
|
|
185
|
+
writer.writerow(
|
|
186
|
+
['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost'])
|
|
187
|
+
for row in self.hccl_raw:
|
|
188
|
+
row[3] = json.dumps(row[3])
|
|
189
|
+
row[4] = json.dumps(row[4])
|
|
190
|
+
writer.writerows(self.hccl_raw)
|
|
191
|
+
except (IOError, OSError) as err:
|
|
192
|
+
logging.critical('Errot occurred when write aicore detail file: %s', err)
|
|
193
|
+
raise ProfilerIOException() from err
|
|
194
|
+
if os.path.exists(hccl_raw_path):
|
|
195
|
+
os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
|
|
196
|
+
|
|
197
|
+
def _original_data_analyse(self, original_data):
|
|
198
|
+
"""analyse original data"""
|
|
199
|
+
|
|
200
|
+
groups_steptrace = {model_id: np.sort(self.steptrace[self.steptrace['Model ID'] == model_id],
|
|
201
|
+
order='Iteration ID')
|
|
202
|
+
for model_id in np.unique(self.steptrace['Model ID'])}
|
|
203
|
+
|
|
204
|
+
target_data = []
|
|
205
|
+
for row in original_data:
|
|
206
|
+
model_id = row.get('args', {}).get('model id')
|
|
207
|
+
if row.get('ph') == 'X' and model_id is not None:
|
|
208
|
+
name = row.get('name')
|
|
209
|
+
pid = row.get('pid')
|
|
210
|
+
tid = row.get('tid')
|
|
211
|
+
ts = row.get('ts')
|
|
212
|
+
dur = row.get('dur')
|
|
213
|
+
te = ts + dur
|
|
214
|
+
ph = row.get('ph')
|
|
215
|
+
task_type = row.get('args', {}).get('task type', '')
|
|
216
|
+
src_rank = row.get('args', {}).get('src rank', 0)
|
|
217
|
+
dst_rank = row.get('args', {}).get('dst rank', 0)
|
|
218
|
+
if src_rank == int('0xffffffff', 16):
|
|
219
|
+
src_rank = dst_rank
|
|
220
|
+
if dst_rank == int('0xffffffff', 16):
|
|
221
|
+
dst_rank = src_rank
|
|
222
|
+
transport_type = row.get('args', {}).get('transport type', '')
|
|
223
|
+
if transport_type == 'LOCAL':
|
|
224
|
+
src_rank, dst_rank = dst_rank, src_rank
|
|
225
|
+
link_info = str(src_rank) + '-' + str(dst_rank)
|
|
226
|
+
size = row.get('args', {}).get('size(Byte)', 0)
|
|
227
|
+
size = size if isinstance(size, int) else int(size, 16)
|
|
228
|
+
steptrace = groups_steptrace.get(model_id, None)
|
|
229
|
+
if steptrace is None:
|
|
230
|
+
logging.warning('Could not find model: %s in hccl json, skip.', model_id)
|
|
231
|
+
continue
|
|
232
|
+
tag = np.searchsorted(steptrace['Iteration End'], te * 1e-3, side='left')
|
|
233
|
+
iteration_id = steptrace[tag]['Iteration ID']
|
|
234
|
+
target_data.append(
|
|
235
|
+
tuple([model_id, iteration_id, name, pid, tid,
|
|
236
|
+
ts, te, dur, ph, task_type,
|
|
237
|
+
link_info, transport_type, size, -1]))
|
|
238
|
+
|
|
239
|
+
hccl_data = np.array(target_data, dtype=self.hccl_data_df)
|
|
240
|
+
|
|
241
|
+
return hccl_data
|
|
242
|
+
|
|
243
|
+
def _iteration_analyse(self, hccl_detail_data, iteration):
|
|
244
|
+
"""analyse data by iteration """
|
|
245
|
+
communication_cost, wait_cost = self._cost_analyse(hccl_detail_data)
|
|
246
|
+
link_info = self._link_info_analyse(hccl_detail_data)
|
|
247
|
+
communication_operator_cost = self._communication_operator_cost_analyse(hccl_detail_data, iteration)
|
|
248
|
+
return [iteration, communication_cost, wait_cost, link_info, communication_operator_cost]
|
|
249
|
+
|
|
250
|
+
def _link_info_analyse(self, hccl_detail_data):
|
|
251
|
+
"""analyse link info data"""
|
|
252
|
+
groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
|
|
253
|
+
link_info_groups = np.unique(groupby_iteration['link_info'])
|
|
254
|
+
link_info_information = dict()
|
|
255
|
+
for link_info_index in link_info_groups:
|
|
256
|
+
groupby_link_info = groupby_iteration[groupby_iteration['link_info'] == link_info_index]
|
|
257
|
+
transport_groups = np.unique(groupby_iteration['transport_type'])
|
|
258
|
+
transport_information = dict()
|
|
259
|
+
for transport_index in transport_groups:
|
|
260
|
+
groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
|
|
261
|
+
if transport_index == 'SDMA' and groupby_transport.size > 0:
|
|
262
|
+
groupby_sdma = \
|
|
263
|
+
groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
|
|
264
|
+
['dur', 'size']]
|
|
265
|
+
sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
|
|
266
|
+
sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
|
|
267
|
+
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
268
|
+
if sdma_communication_time != 0 else 0
|
|
269
|
+
transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
|
|
270
|
+
elif transport_index == 'RDMA' and groupby_transport.size > 0:
|
|
271
|
+
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
272
|
+
link_info_information[link_info_index] = transport_information
|
|
273
|
+
return link_info_information
|
|
274
|
+
|
|
275
|
+
def _communication_operator_cost_analyse(self, hccl_detail_data, iteration_index):
|
|
276
|
+
"""analyse communication operator cost"""
|
|
277
|
+
groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
|
|
278
|
+
tag_groups = np.unique(groupby_iteration['tag'])
|
|
279
|
+
tag_information = dict()
|
|
280
|
+
for tag_index in tag_groups:
|
|
281
|
+
groupby_tag = groupby_iteration[groupby_iteration['tag'] == tag_index]
|
|
282
|
+
link_groups = np.unique(groupby_iteration['link_info'])
|
|
283
|
+
link_info_information = dict()
|
|
284
|
+
for link_info_index in link_groups:
|
|
285
|
+
groupby_link_info = groupby_tag[groupby_tag['link_info'] == link_info_index]
|
|
286
|
+
transport_groups = np.unique(groupby_link_info['transport_type'])
|
|
287
|
+
transport_information = dict()
|
|
288
|
+
for transport_index in transport_groups:
|
|
289
|
+
groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
|
|
290
|
+
if transport_index == 'SDMA':
|
|
291
|
+
groupby_sdma = \
|
|
292
|
+
groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
|
|
293
|
+
['dur', 'size']]
|
|
294
|
+
sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
|
|
295
|
+
sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
|
|
296
|
+
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
297
|
+
if sdma_communication_time != 0 else 0
|
|
298
|
+
transport_information['SDMA'] = [
|
|
299
|
+
sdma_communication_time, sdma_communication_size,
|
|
300
|
+
sdma_bandwidth
|
|
301
|
+
]
|
|
302
|
+
elif transport_index == 'RDMA':
|
|
303
|
+
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
304
|
+
link_info_information[link_info_index] = transport_information
|
|
305
|
+
communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
|
|
306
|
+
wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
|
|
307
|
+
tag_information[tag_index] = [
|
|
308
|
+
str(iteration_index), communication_cost, wait_cost,
|
|
309
|
+
link_info_information
|
|
310
|
+
]
|
|
311
|
+
return tag_information
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class AscendHCCLGeneratorOld:
|
|
315
|
+
"""Generate ascend hccl data from files."""
|
|
316
|
+
|
|
85
317
|
def __init__(self, source_path):
|
|
86
318
|
self.root_path = source_path
|
|
87
319
|
self.hccl_raw = []
|
|
@@ -99,7 +331,7 @@ class AscendHCCLGenerator:
|
|
|
99
331
|
@staticmethod
|
|
100
332
|
def _rdma_analyse(groupby_transport):
|
|
101
333
|
"""rdma analyse"""
|
|
102
|
-
thread_groups
|
|
334
|
+
thread_groups = np.unique(groupby_transport['tid'])
|
|
103
335
|
thread_information = []
|
|
104
336
|
for thread_index in thread_groups:
|
|
105
337
|
groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
|
|
@@ -107,7 +339,7 @@ class AscendHCCLGenerator:
|
|
|
107
339
|
rdma_communication_size = 0
|
|
108
340
|
rdma_communication_wait_time = 0
|
|
109
341
|
start_index = 0
|
|
110
|
-
end_index = groupby_thread.size -
|
|
342
|
+
end_index = groupby_thread.size - 2
|
|
111
343
|
while start_index < end_index:
|
|
112
344
|
first_task_type = groupby_thread[start_index]['task_type']
|
|
113
345
|
if first_task_type == 'RDMASend':
|
|
@@ -142,7 +374,8 @@ class AscendHCCLGenerator:
|
|
|
142
374
|
file_list = find_files(self.root_path, "hccl_*.json")
|
|
143
375
|
|
|
144
376
|
for hccl_file in file_list:
|
|
145
|
-
|
|
377
|
+
_, relative_path = os.path.split(hccl_file)
|
|
378
|
+
iteration_id = int(relative_path.split('_')[3])
|
|
146
379
|
with open(hccl_file) as f:
|
|
147
380
|
_, hccl_detail_data = self._original_data_analyse(json.load(f))
|
|
148
381
|
raw = self._iteration_analyse(hccl_detail_data, iteration_id)
|
|
@@ -173,7 +406,7 @@ class AscendHCCLGenerator:
|
|
|
173
406
|
writer.writerows(self.hccl_raw)
|
|
174
407
|
except (IOError, OSError) as err:
|
|
175
408
|
logging.critical('Errot occurred when write aicore detail file: %s', err)
|
|
176
|
-
raise ProfilerIOException()
|
|
409
|
+
raise ProfilerIOException() from err
|
|
177
410
|
if os.path.exists(hccl_raw_path):
|
|
178
411
|
os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
|
|
179
412
|
|
|
@@ -206,8 +439,8 @@ class AscendHCCLGenerator:
|
|
|
206
439
|
tuple([name, pid, tid, ts, te, dur, ph, task_type, link_info, transport_type, size, -1]))
|
|
207
440
|
hccl_data = np.array(target_data, dtype=self.hccl_data_df)
|
|
208
441
|
|
|
209
|
-
hccl_abstract_data = hccl_data[hccl_data['
|
|
210
|
-
hccl_detail_data = hccl_data[hccl_data['
|
|
442
|
+
hccl_abstract_data = hccl_data[hccl_data['task_type'] == '']
|
|
443
|
+
hccl_detail_data = hccl_data[hccl_data['task_type'] != '']
|
|
211
444
|
|
|
212
445
|
hccl_abstract_data = hccl_abstract_data[np.argsort(hccl_abstract_data['ts'])]
|
|
213
446
|
hccl_detail_data = hccl_detail_data[np.argsort(hccl_detail_data['ts'])]
|
|
@@ -234,7 +467,7 @@ class AscendHCCLGenerator:
|
|
|
234
467
|
transport_information = dict()
|
|
235
468
|
for transport_index in transport_groups:
|
|
236
469
|
groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
|
|
237
|
-
if transport_index == 'SDMA':
|
|
470
|
+
if transport_index == 'SDMA' and groupby_transport.size > 0:
|
|
238
471
|
groupby_sdma = \
|
|
239
472
|
groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
|
|
240
473
|
['dur', 'size']]
|
|
@@ -243,7 +476,7 @@ class AscendHCCLGenerator:
|
|
|
243
476
|
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
244
477
|
if sdma_communication_time != 0 else 0
|
|
245
478
|
transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
|
|
246
|
-
elif transport_index == 'RDMA':
|
|
479
|
+
elif transport_index == 'RDMA' and groupby_transport.size > 0:
|
|
247
480
|
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
248
481
|
link_info_information[link_info_index] = transport_information
|
|
249
482
|
return link_info_information
|
|
@@ -271,13 +504,17 @@ class AscendHCCLGenerator:
|
|
|
271
504
|
sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
|
|
272
505
|
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
273
506
|
if sdma_communication_time != 0 else 0
|
|
274
|
-
transport_information['SDMA'] = [
|
|
275
|
-
|
|
507
|
+
transport_information['SDMA'] = [
|
|
508
|
+
sdma_communication_time, sdma_communication_size,
|
|
509
|
+
sdma_bandwidth
|
|
510
|
+
]
|
|
276
511
|
elif transport_index == 'RDMA':
|
|
277
512
|
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
278
513
|
link_info_information[link_info_index] = transport_information
|
|
279
514
|
communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
|
|
280
515
|
wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
|
|
281
|
-
tag_information[tag_index] = [
|
|
282
|
-
|
|
516
|
+
tag_information[tag_index] = [
|
|
517
|
+
str(iteration_index), communication_cost, wait_cost,
|
|
518
|
+
link_info_information
|
|
519
|
+
]
|
|
283
520
|
return tag_information
|