mindspore 2.3.0rc1__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +13 -3
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +20 -0
- mindspore/_extends/parse/parser.py +1 -1
- mindspore/_extends/parse/standard_method.py +6 -5
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -5
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -2
- mindspore/common/_register_for_recompute.py +48 -0
- mindspore/common/_stub_tensor.py +1 -0
- mindspore/common/api.py +56 -4
- mindspore/common/dtype.py +5 -3
- mindspore/common/dump.py +2 -2
- mindspore/common/hook_handle.py +51 -4
- mindspore/common/initializer.py +1 -1
- mindspore/common/jit_config.py +17 -6
- mindspore/common/parameter.py +7 -2
- mindspore/common/recompute.py +247 -0
- mindspore/common/sparse_tensor.py +2 -2
- mindspore/common/symbol.py +1 -1
- mindspore/common/tensor.py +74 -36
- mindspore/communication/__init__.py +3 -3
- mindspore/communication/management.py +30 -30
- mindspore/context.py +28 -15
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +51 -51
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +3 -3
- mindspore/dataset/engine/datasets_audio.py +14 -14
- mindspore/dataset/engine/datasets_standard_format.py +3 -3
- mindspore/dataset/engine/datasets_text.py +38 -38
- mindspore/dataset/engine/datasets_user_defined.py +3 -3
- mindspore/dataset/engine/datasets_vision.py +68 -68
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +26 -26
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/transforms.py +92 -92
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/experimental/optim/adadelta.py +2 -2
- mindspore/experimental/optim/adagrad.py +2 -2
- mindspore/experimental/optim/adam.py +2 -2
- mindspore/experimental/optim/adamax.py +2 -2
- mindspore/experimental/optim/adamw.py +2 -2
- mindspore/experimental/optim/asgd.py +2 -2
- mindspore/experimental/optim/lr_scheduler.py +24 -20
- mindspore/experimental/optim/nadam.py +2 -2
- mindspore/experimental/optim/optimizer.py +1 -1
- mindspore/experimental/optim/radam.py +2 -2
- mindspore/experimental/optim/rmsprop.py +2 -2
- mindspore/experimental/optim/rprop.py +2 -2
- mindspore/experimental/optim/sgd.py +2 -2
- mindspore/hal/stream.py +2 -0
- mindspore/include/mindapi/base/types.h +5 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +6 -6
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +101787 -98559
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/base/op_register.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/mix.h +8 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/norm.h +5 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/params/reduce.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/backend/backend.h +3 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/backend/rtbackend.h +3 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/module/module.h +3 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/svector/svector.h +3 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/tiling/add_tiling.h +9 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +2 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_base.h +460 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_bf16.h +217 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp16.h +116 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_tiling.h +16 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_value.h +27 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -4
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{attention/FlashAttentionScore_impl.h → flash_attention_score/flash_attention_score_impl.h} +2 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{attention/bs_attention_tiling.h → flash_attention_score/flash_attention_score_tiling.h} +15 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/gelu/tiling/gelu_tiling.h +7 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/lccl/lccl_wrapper.h +58 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +19 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{matmul → matmul_common}/pp_matmul_common_tiling.h +18 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{matmul → matmul_common}/pp_matmul_info.h +7 -4
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{matmul → matmul_common}/tiling_data.h +44 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_utils.h +65 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +10 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +4 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +41 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{attention/PagedAttention_impl.h → paged_attention/paged_attention_impl.h} +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +63 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/add_param.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{attention_param.h → param/attention_param.h} +11 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +37 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +45 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/reshape_and_cache/reshape_and_cache_tiling.h +1 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/kernel/rms_norm.h +23 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/kernel/rms_norm_base.h +175 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/kernel/rms_norm_normal.h +276 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/kernel/rms_norm_split_d.h +280 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/kernel/tiling_data.h +35 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +45 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/kernel/sub_kernel.h +20 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +47 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_tiling.h +25 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +323 -23
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/types.h +15 -4
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +8 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lcal.h +22 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lcal_comm.h +70 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lcal_types.h +103 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lccl.h +47 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lccl_wrapper.h +58 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/include/lcoc.h +154 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +2 -2
- mindspore/mint/__init__.py +457 -0
- mindspore/mint/nn/__init__.py +430 -0
- mindspore/mint/nn/functional.py +424 -0
- mindspore/mint/optim/__init__.py +24 -0
- mindspore/mint/optim/adamw.py +186 -0
- mindspore/multiprocessing/__init__.py +4 -0
- mindspore/nn/__init__.py +3 -0
- mindspore/nn/cell.py +51 -47
- mindspore/nn/extend/__init__.py +29 -0
- mindspore/nn/extend/basic.py +140 -0
- mindspore/nn/extend/embedding.py +143 -0
- mindspore/nn/extend/layer/__init__.py +27 -0
- mindspore/nn/extend/layer/normalization.py +107 -0
- mindspore/nn/extend/pooling.py +117 -0
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/basic.py +109 -1
- mindspore/nn/layer/container.py +2 -2
- mindspore/nn/layer/conv.py +6 -6
- mindspore/nn/layer/embedding.py +1 -1
- mindspore/nn/layer/normalization.py +21 -43
- mindspore/nn/layer/padding.py +4 -0
- mindspore/nn/optim/ada_grad.py +2 -2
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +7 -7
- mindspore/nn/optim/adamax.py +2 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +1 -1
- mindspore/nn/optim/lamb.py +3 -3
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +2 -2
- mindspore/nn/optim/momentum.py +2 -2
- mindspore/nn/optim/optimizer.py +2 -2
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +2 -2
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/sgd.py +2 -2
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +9 -9
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -2
- mindspore/ops/_vmap/vmap_math_ops.py +27 -8
- mindspore/ops/_vmap/vmap_nn_ops.py +66 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +73 -1
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +12 -3
- mindspore/ops/auto_generate/gen_arg_handler.py +24 -0
- mindspore/ops/auto_generate/gen_extend_func.py +274 -0
- mindspore/ops/auto_generate/gen_ops_def.py +889 -22
- mindspore/ops/auto_generate/gen_ops_prim.py +3541 -253
- mindspore/ops/auto_generate/pyboost_inner_prim.py +282 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +9 -0
- mindspore/ops/extend/__init__.py +9 -1
- mindspore/ops/extend/array_func.py +134 -27
- mindspore/ops/extend/math_func.py +3 -3
- mindspore/ops/extend/nn_func.py +363 -2
- mindspore/ops/function/__init__.py +19 -2
- mindspore/ops/function/array_func.py +463 -439
- mindspore/ops/function/clip_func.py +7 -18
- mindspore/ops/function/grad/grad_func.py +5 -5
- mindspore/ops/function/linalg_func.py +4 -4
- mindspore/ops/function/math_func.py +260 -243
- mindspore/ops/function/nn_func.py +825 -62
- mindspore/ops/function/random_func.py +73 -4
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +1 -1
- mindspore/ops/functional.py +2 -2
- mindspore/ops/op_info_register.py +1 -31
- mindspore/ops/operations/__init__.py +2 -3
- mindspore/ops/operations/_grad_ops.py +2 -107
- mindspore/ops/operations/_inner_ops.py +5 -5
- mindspore/ops/operations/_sequence_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +11 -233
- mindspore/ops/operations/comm_ops.py +32 -32
- mindspore/ops/operations/custom_ops.py +7 -89
- mindspore/ops/operations/manually_defined/ops_def.py +329 -4
- mindspore/ops/operations/math_ops.py +13 -163
- mindspore/ops/operations/nn_ops.py +9 -316
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +3 -3
- mindspore/ops/primitive.py +2 -2
- mindspore/ops_generate/arg_dtype_cast.py +12 -3
- mindspore/ops_generate/arg_handler.py +24 -0
- mindspore/ops_generate/gen_ops_inner_prim.py +2 -0
- mindspore/ops_generate/gen_pyboost_func.py +13 -6
- mindspore/ops_generate/pyboost_utils.py +2 -17
- mindspore/parallel/__init__.py +3 -2
- mindspore/parallel/_auto_parallel_context.py +106 -1
- mindspore/parallel/_parallel_serialization.py +34 -2
- mindspore/parallel/_utils.py +16 -0
- mindspore/parallel/algo_parameter_config.py +4 -4
- mindspore/parallel/checkpoint_transform.py +249 -77
- mindspore/parallel/cluster/process_entity/_api.py +1 -1
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +1 -1
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +1 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +17 -5
- mindspore/profiler/parser/ascend_msprof_exporter.py +3 -3
- mindspore/profiler/parser/ascend_msprof_generator.py +10 -3
- mindspore/profiler/parser/ascend_op_generator.py +26 -9
- mindspore/profiler/parser/ascend_timeline_generator.py +7 -4
- mindspore/profiler/parser/profiler_info.py +11 -1
- mindspore/profiler/profiling.py +13 -5
- mindspore/rewrite/api/node.py +12 -12
- mindspore/rewrite/api/symbol_tree.py +11 -11
- mindspore/run_check/_check_version.py +1 -1
- mindspore/safeguard/rewrite_obfuscation.py +2 -2
- mindspore/train/amp.py +4 -4
- mindspore/train/anf_ir_pb2.py +8 -2
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +2 -2
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
- mindspore/train/callback/_summary_collector.py +2 -2
- mindspore/train/callback/_time_monitor.py +2 -2
- mindspore/train/dataset_helper.py +8 -3
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/mind_ir_pb2.py +22 -17
- mindspore/train/model.py +15 -15
- mindspore/train/serialization.py +18 -18
- mindspore/train/summary/summary_record.py +7 -7
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/version.py +1 -1
- {mindspore-2.3.0rc1.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +1 -1
- {mindspore-2.3.0rc1.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +307 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/tiling_data.h +0 -59
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/FlashAttentionScore_bf16_BNSD_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/FlashAttentionScore_bf16_BSH_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/FlashAttentionScore_fp16_BNSD_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/FlashAttentionScore_fp16_BSH_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/PagedAttention_bf16_BNSD_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/PagedAttention_bf16_BSH_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/PagedAttention_fp16_BNSD_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/PagedAttention_fp16_BSH_mix.o +0 -0
- /mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/{attention/bs_attention_mix_hwsync.h → flash_attention_score/kernel/flash_attention_score_mix_hwsync.h} +0 -0
- {mindspore-2.3.0rc1.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0rc1.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0rc1.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
#ifndef LCAL_COMM_H
|
|
17
|
+
#define LCAL_COMM_H
|
|
18
|
+
|
|
19
|
+
#include <vector>
|
|
20
|
+
#include <string>
|
|
21
|
+
#include <unordered_map>
|
|
22
|
+
|
|
23
|
+
#include <lcal_types.h>
|
|
24
|
+
#include <hccl.h>
|
|
25
|
+
|
|
26
|
+
namespace Lcal {
|
|
27
|
+
constexpr int IPC_NAME_SIZE = 65;
|
|
28
|
+
|
|
29
|
+
class LcalComm {
|
|
30
|
+
public:
|
|
31
|
+
LcalComm(int rank, int rankSize, int devId = -1, const std::vector<int> &devList = {});
|
|
32
|
+
~LcalComm();
|
|
33
|
+
LcalComm(const LcalComm &) = delete;
|
|
34
|
+
LcalComm &operator=(const LcalComm &) = delete;
|
|
35
|
+
int Init(const std::string &uid = "", int maxBuffSize = LCAL_BUFF_BYTES);
|
|
36
|
+
int InitThread();
|
|
37
|
+
int GetRank() const;
|
|
38
|
+
int GetRankSize() const;
|
|
39
|
+
const PhysicalInfo &GetPhysicalInfo() const;
|
|
40
|
+
friend class Lccl;
|
|
41
|
+
friend class Lcoc;
|
|
42
|
+
|
|
43
|
+
private:
|
|
44
|
+
int SetIpcPid(const char *name, const uint32_t *pids) const;
|
|
45
|
+
int OpenIpcMem(const char names[LCAL_MAX_RANK_SIZE][IPC_NAME_SIZE]);
|
|
46
|
+
int GetDev();
|
|
47
|
+
int GetDevThread();
|
|
48
|
+
int EnablePeerAccess();
|
|
49
|
+
int InitCommMem(int maxBuffSize);
|
|
50
|
+
int InitCommon();
|
|
51
|
+
void FreePeerMem(int8_t *&mem);
|
|
52
|
+
int InitMem(int maxBuffSize);
|
|
53
|
+
int GetPid(uint32_t *pids);
|
|
54
|
+
int GetName(const char *name, char names[LCAL_MAX_RANK_SIZE][IPC_NAME_SIZE]);
|
|
55
|
+
|
|
56
|
+
private:
|
|
57
|
+
std::string shmName_ = "lccl";
|
|
58
|
+
int rank_ = 0;
|
|
59
|
+
int rankSize_ = 0;
|
|
60
|
+
int devId_ = 0;
|
|
61
|
+
bool inited_ = false;
|
|
62
|
+
std::vector<int> devList_;
|
|
63
|
+
int8_t *peerMem_[LCAL_MAX_RANK_SIZE] = {}; // shared ping pong buff
|
|
64
|
+
std::unordered_map<std::string, const char *> kernelNameMap_;
|
|
65
|
+
PhysicalInfo physicalInfo_ = {};
|
|
66
|
+
bool deterministic_ = false;
|
|
67
|
+
};
|
|
68
|
+
} // Lcal
|
|
69
|
+
|
|
70
|
+
#endif // LCAL_COMM_H
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
#ifndef LCAL_TYPES_H
|
|
17
|
+
#define LCAL_TYPES_H
|
|
18
|
+
|
|
19
|
+
#include <cstdint>
|
|
20
|
+
#include <hccl_types.h>
|
|
21
|
+
#include <map>
|
|
22
|
+
#include <string>
|
|
23
|
+
|
|
24
|
+
namespace Lcal {
|
|
25
|
+
constexpr int LCAL_SUCCESS = 0;
|
|
26
|
+
constexpr int LCAL_ERROR_NOT_INITIALIZED = -1;
|
|
27
|
+
constexpr int LCAL_ERROR_ASDRT = -2;
|
|
28
|
+
constexpr int LCAL_ERROR_PARA_CHECK_FAIL = -3;
|
|
29
|
+
constexpr int LCAL_ERROR_INTERNAL = -4;
|
|
30
|
+
constexpr int LCAL_ERROR_TIMEOUT = -5;
|
|
31
|
+
constexpr int LCCL_ERROR_INIT_HCCL_FAILED = -6;
|
|
32
|
+
constexpr int64_t LCAL_INVALID_VALUE = -1;
|
|
33
|
+
constexpr int LCAL_BUFF_BYTES = 202 * 1024 * 1024; // shared buffer size
|
|
34
|
+
constexpr int LCAL_MAX_RANK_SIZE = 8;
|
|
35
|
+
|
|
36
|
+
constexpr uint32_t ALIGN_BYTES = 512;
|
|
37
|
+
|
|
38
|
+
enum class ChipName {
|
|
39
|
+
CHIP_310P3,
|
|
40
|
+
CHIP_910B1,
|
|
41
|
+
CHIP_910B2,
|
|
42
|
+
CHIP_910B3,
|
|
43
|
+
CHIP_910B4,
|
|
44
|
+
RESERVED,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
enum class PhysicalLink {
|
|
48
|
+
HCCS = 0,
|
|
49
|
+
PCIE = 1,
|
|
50
|
+
RESERVED,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// 包含 物理链路、芯片名称 信息。
|
|
54
|
+
struct PhysicalInfo {
|
|
55
|
+
ChipName chipName = ChipName::RESERVED;
|
|
56
|
+
PhysicalLink physicalLink = PhysicalLink::RESERVED;
|
|
57
|
+
uint32_t coreNum = 0;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
enum class LcalType {
|
|
61
|
+
ALL_REDUCE = 1,
|
|
62
|
+
REDUCE_SCATTER = 2,
|
|
63
|
+
ALL_GATHER = 3,
|
|
64
|
+
BROADCAST = 4,
|
|
65
|
+
ALL2ALL = 5,
|
|
66
|
+
PURE_MATMUL = 101,
|
|
67
|
+
MATMUL_ALL_REDUCE = 102,
|
|
68
|
+
MATMUL_REDUCE_SCATTER = 103,
|
|
69
|
+
ALL_GATHER_MATMUL = 104,
|
|
70
|
+
ALL_GATHER_MATMUL_V2 = 105,
|
|
71
|
+
ALL2ALL_MATMUL = 106,
|
|
72
|
+
MATMUL_ALL2ALL = 107,
|
|
73
|
+
MTE2_TEST = 108,
|
|
74
|
+
MATMUL_ALL_REDUCE_DETERMINISTIC = 109,
|
|
75
|
+
MATMUL_REDUCE_SCATTER_DETERMINISTIC = 110,
|
|
76
|
+
LCAL_TYPE_MAX = 111,
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const std::map<LcalType, std::string> LCAL_TYPE2NAME = {
|
|
80
|
+
{ LcalType::ALL_REDUCE, "LcalAllReduce" },
|
|
81
|
+
{ LcalType::REDUCE_SCATTER, "LcalReduceScatter" },
|
|
82
|
+
{ LcalType::ALL_GATHER, "LcalAllGather" },
|
|
83
|
+
{ LcalType::BROADCAST, "LcalBroadcast" },
|
|
84
|
+
{ LcalType::ALL2ALL, "LcalAll2All" },
|
|
85
|
+
{ LcalType::PURE_MATMUL, "LcalPureMatmul" },
|
|
86
|
+
{ LcalType::MATMUL_ALL_REDUCE, "LcalMatmulAllReduce" },
|
|
87
|
+
{ LcalType::MATMUL_REDUCE_SCATTER, "LcalMatmulReduceScatter" },
|
|
88
|
+
{ LcalType::ALL_GATHER_MATMUL, "LcalAllGatherMatmul" },
|
|
89
|
+
{ LcalType::ALL_GATHER_MATMUL_V2, "LcalAllGatherMatmulV2" },
|
|
90
|
+
{ LcalType::ALL2ALL_MATMUL, "LcalAll2AllMatmul" },
|
|
91
|
+
{ LcalType::MATMUL_ALL2ALL, "LcalMatmulAll2All" },
|
|
92
|
+
{ LcalType::MTE2_TEST, "LcalMTE2Test" },
|
|
93
|
+
{ LcalType::MATMUL_ALL_REDUCE_DETERMINISTIC, "LcalMatmulAllReduceDeterministic" },
|
|
94
|
+
{ LcalType::MATMUL_REDUCE_SCATTER_DETERMINISTIC, "LcalMatmulReduceScatterDeterministic" },
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const std::map<LcalType, LcalType> NORMAL2DETERMINISTIC = {
|
|
98
|
+
{ LcalType::MATMUL_ALL_REDUCE, LcalType::MATMUL_ALL_REDUCE_DETERMINISTIC },
|
|
99
|
+
{ LcalType::MATMUL_REDUCE_SCATTER, LcalType::MATMUL_REDUCE_SCATTER_DETERMINISTIC }
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
} // namespace Lcal
|
|
103
|
+
#endif // LCAL_TYPES_H
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
#ifndef LACL_LCCL_H
|
|
17
|
+
#define LACL_LCCL_H
|
|
18
|
+
|
|
19
|
+
#include <lcal_comm.h>
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
namespace Lcal {
|
|
23
|
+
class Lccl {
|
|
24
|
+
public:
|
|
25
|
+
Lccl(LcalComm &comm);
|
|
26
|
+
~Lccl();
|
|
27
|
+
int Init(const std::string &uid = "", int maxBuffSize = LCAL_BUFF_BYTES);
|
|
28
|
+
int InitThread();
|
|
29
|
+
int AllReduce(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType,
|
|
30
|
+
HcclReduceOp op = HCCL_REDUCE_SUM, aclrtStream stream = nullptr);
|
|
31
|
+
int ReduceScatter(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType,
|
|
32
|
+
HcclReduceOp op = HCCL_REDUCE_SUM, aclrtStream stream = nullptr);
|
|
33
|
+
int AllGather(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType, aclrtStream stream = nullptr);
|
|
34
|
+
int All2All(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType, aclrtStream stream = nullptr);
|
|
35
|
+
int Broadcast(void *buff, int64_t count, HcclDataType dataType, int32_t root, aclrtStream stream = nullptr);
|
|
36
|
+
|
|
37
|
+
private:
|
|
38
|
+
bool CheckDataType(const HcclDataType &dataType) const;
|
|
39
|
+
int LoopBack(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType);
|
|
40
|
+
|
|
41
|
+
private:
|
|
42
|
+
LcalComm &comm_;
|
|
43
|
+
int rank_ = 0;
|
|
44
|
+
int rankSize_ = 0;
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
#endif // LACL_LCCL_H
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
#ifndef LCCL_WRAPPER_H_
|
|
17
|
+
#define LCCL_WRAPPER_H_
|
|
18
|
+
|
|
19
|
+
#include <memory>
|
|
20
|
+
#include "lccl.h"
|
|
21
|
+
|
|
22
|
+
#ifdef __cplusplus
|
|
23
|
+
extern "C" {
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
using namespace Lcal;
|
|
27
|
+
using LcclComm = std::shared_ptr<Lccl>;
|
|
28
|
+
enum class LcclResult {
|
|
29
|
+
LCAL_SUCCESS = 0,
|
|
30
|
+
LCAL_ERROR_NOT_INITIALIZED = -1,
|
|
31
|
+
LCAL_ERROR_ASDRT = -2,
|
|
32
|
+
LCAL_ERROR_PARA_CHECK_FAIL = -3,
|
|
33
|
+
LCAL_ERROR_INTERNAL = -4,
|
|
34
|
+
LCAL_ERROR_TIMEOUT = -5,
|
|
35
|
+
LCCL_ERROR_INIT_HCCL_FAILED = -6
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
extern LcclResult LcclCommInitRank(uint32_t nRanks, uint32_t rank, LcclComm *comm);
|
|
39
|
+
|
|
40
|
+
extern LcclResult LcclAllReduce(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType,
|
|
41
|
+
HcclReduceOp op, aclrtStream stream);
|
|
42
|
+
|
|
43
|
+
extern LcclResult LcclReduceScatter(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType,
|
|
44
|
+
HcclReduceOp op, aclrtStream stream);
|
|
45
|
+
|
|
46
|
+
extern LcclResult LcclAllGather(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType, aclrtStream stream);
|
|
47
|
+
|
|
48
|
+
extern LcclResult LcclAll2All(void *sendBuff, void *recvBuff, int64_t count, HcclDataType dataType, aclrtStream stream);
|
|
49
|
+
|
|
50
|
+
extern LcclResult LcclBroadcast(void *buff, int64_t count, HcclDataType dataType, int32_t root, aclrtStream stream);
|
|
51
|
+
|
|
52
|
+
extern LcclResult LcclCommDestroy(LcclComm comm);
|
|
53
|
+
|
|
54
|
+
#ifdef __cplusplus
|
|
55
|
+
}
|
|
56
|
+
#endif
|
|
57
|
+
|
|
58
|
+
#endif // LCCL_WRAPPER_H_
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
#ifndef LCAL_LCOC_H
|
|
17
|
+
#define LCAL_LCOC_H
|
|
18
|
+
|
|
19
|
+
#include <lcal_comm.h>
|
|
20
|
+
|
|
21
|
+
namespace Lcal {
|
|
22
|
+
enum CoCDataTypeDesc : int {
|
|
23
|
+
COC_DATA_TYPE_UNDEFINED = -1,
|
|
24
|
+
FP16FP16_FP32_FP16 = 0, // 无量化,无反量化
|
|
25
|
+
BF16BF16_FP32_BF16 = 1, // 无量化,无反量化
|
|
26
|
+
INT8INT8_INT32_FP16 = 2, // W8A8,未融合量化,随路反量化
|
|
27
|
+
INT8INT8_INT32_BF16 = 3, // W8A8,未融合量化,aiv反量化
|
|
28
|
+
FP16INT8_INT32_FP16 = 4, // W8A8,融合量化,随路反量化
|
|
29
|
+
BF16INT8_INT32_BF16 = 5, // W8A8,融合量化,aiv反量化
|
|
30
|
+
FP16INT8_FP32_FP16 = 6, // W8A16,融合伪量化,无反量化
|
|
31
|
+
BF16INT8_FP32_BF16 = 7, // W8A16,融合伪量化,无反量化
|
|
32
|
+
FP16INT4_FP32_FP16 = 8, // W4A16,融合伪量化,无反量化
|
|
33
|
+
BF16INT4_FP32_BF16 = 9, // W4A16,融合伪量化,无反量化
|
|
34
|
+
COC_DATA_TYPE_DESC_MAX = 10,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
struct MatMulInfo {
|
|
38
|
+
int64_t batchSize = 1;
|
|
39
|
+
int64_t m = -1;
|
|
40
|
+
int64_t k = -1;
|
|
41
|
+
int64_t n = -1;
|
|
42
|
+
bool transA = false;
|
|
43
|
+
bool transB = false;
|
|
44
|
+
bool withBias = false;
|
|
45
|
+
bool isInt8 = false;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
enum QuantGranularity : int {
|
|
49
|
+
QUANT_GRANULARITY_UNDEFINED = -1,
|
|
50
|
+
PER_TENSOR = 0,
|
|
51
|
+
PER_CHANNEL = 1,
|
|
52
|
+
PER_GROUP = 2,
|
|
53
|
+
QUANT_GRANULARITY_MAX = 3,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
struct QuantInfo {
|
|
57
|
+
QuantGranularity dequantGranularity = QuantGranularity::QUANT_GRANULARITY_UNDEFINED; // 反量化(包括Matmul前置伪量化和后置反量化)粒度
|
|
58
|
+
int32_t dequantGroupSize = -1;
|
|
59
|
+
|
|
60
|
+
QuantGranularity quantGranularity = QuantGranularity::QUANT_GRANULARITY_UNDEFINED; // 量化粒度
|
|
61
|
+
int32_t quantGroupSize = -1;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
struct CoCParamDesc {
|
|
65
|
+
CoCDataTypeDesc dataTypeDesc = FP16FP16_FP32_FP16;
|
|
66
|
+
MatMulInfo mmInfo = {};
|
|
67
|
+
QuantInfo quantInfo = {};
|
|
68
|
+
HcclReduceOp op = HCCL_REDUCE_SUM; // 当前不支持其他值
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
struct CoCTiling {
|
|
72
|
+
int32_t m0 = -1;
|
|
73
|
+
int32_t k0 = -1;
|
|
74
|
+
int32_t n0 = -1;
|
|
75
|
+
int32_t swizzlDirect = -1;
|
|
76
|
+
int32_t swizzlCount = -1;
|
|
77
|
+
int32_t splitK = -1;
|
|
78
|
+
int32_t ubMoveNum = -1;
|
|
79
|
+
int32_t pValue = -1;
|
|
80
|
+
int32_t write2OtherRank = -1;
|
|
81
|
+
int32_t blockDim = -1;
|
|
82
|
+
int32_t withSerialMode = -1;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
struct CoCInputPkg {
|
|
86
|
+
void *matrixA = nullptr;
|
|
87
|
+
void *matrixB = nullptr;
|
|
88
|
+
void *bias = nullptr;
|
|
89
|
+
|
|
90
|
+
void *dequantScale = nullptr; // 反量化参数,当融合了Matmul前置伪量化或后置反量化操作时需要传入
|
|
91
|
+
void *dequantOffset = nullptr; // 可选,若无offset(如对称量化场景),传入空指针即可
|
|
92
|
+
|
|
93
|
+
void *quantScale = nullptr; // 量化参数,当融合了量化操作时需要传入
|
|
94
|
+
void *quantOffset = nullptr; // 可选,若无offset(如对称量化场景),传入空指针即可
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
struct CoCOutputPkg{
|
|
98
|
+
void *output = nullptr;
|
|
99
|
+
void *midOutput = nullptr; // 先通信后计算情况下,通信的中间结果
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
class Lcoc {
|
|
103
|
+
public:
|
|
104
|
+
explicit Lcoc(LcalComm &comm);
|
|
105
|
+
~Lcoc();
|
|
106
|
+
int Init(const std::string &uid = "", int maxBuffSize = LCAL_BUFF_BYTES);
|
|
107
|
+
int SetParam(LcalType lcalType, const CoCTiling &tiling, const CoCParamDesc ¶mDesc);
|
|
108
|
+
int MTE2Test(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
109
|
+
int All2AllMatmul(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
110
|
+
int AllGatherMatmul(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
111
|
+
int AllGatherMatmulV2(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
112
|
+
int MatmulReduceScatter(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace,
|
|
113
|
+
aclrtStream stream = nullptr);
|
|
114
|
+
int MatmulAllReduce(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
115
|
+
int PureMatmul(CoCInputPkg inputPkg, CoCOutputPkg outputPkg, void *workspace, aclrtStream stream = nullptr);
|
|
116
|
+
int64_t GetWorkspaceSize();
|
|
117
|
+
LcalComm &GetComm();
|
|
118
|
+
MatMulInfo &GetMatMulInfo();
|
|
119
|
+
int32_t GetEleSize();
|
|
120
|
+
void GetTiling(CoCTiling &tiling);
|
|
121
|
+
|
|
122
|
+
private:
|
|
123
|
+
bool CheckDataType() const;
|
|
124
|
+
bool InitTiling(const CoCTiling &tiling);
|
|
125
|
+
int LaunchOperator(CoCInputPkg &inputPkg, CoCOutputPkg &outputPkg, void *workspace, aclrtStream stream);
|
|
126
|
+
bool CheckBasic(const CoCInputPkg &inputPkg, const CoCOutputPkg &outputPkg, LcalType lcalType);
|
|
127
|
+
|
|
128
|
+
private:
|
|
129
|
+
LcalComm &comm_;
|
|
130
|
+
LcalType lcalType_ = LcalType::ALL_REDUCE;
|
|
131
|
+
CoCParamDesc paramDesc_ = {};
|
|
132
|
+
CoCTiling tiling_ = {};
|
|
133
|
+
int rank_ = 0;
|
|
134
|
+
int rankSize_ = 0;
|
|
135
|
+
bool tilingSuccess_ = false;
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
struct WorkspaceDetail {
|
|
139
|
+
int64_t matrixActivationSize{ 0 };
|
|
140
|
+
int64_t matrixWeightSize{ 0 };
|
|
141
|
+
int64_t matrixIntermediateSize{ 0 };
|
|
142
|
+
int64_t formatDequantParamSize{ 0 };
|
|
143
|
+
|
|
144
|
+
int64_t GetSize() const
|
|
145
|
+
{
|
|
146
|
+
return matrixActivationSize + matrixWeightSize + matrixIntermediateSize + formatDequantParamSize;
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
WorkspaceDetail GetWorkspaceDetail(CoCDataTypeDesc dataType, const MatMulInfo &mmInfo, const QuantInfo &quantInfo);
|
|
151
|
+
|
|
152
|
+
void GetLcalTypeByDeterministic(LcalType &lcalType, bool deterministic);
|
|
153
|
+
}
|
|
154
|
+
#endif // LCAL_LCOC_H
|
|
Binary file
|
|
Binary file
|
mindspore/log.py
CHANGED
|
@@ -234,11 +234,11 @@ def get_level():
|
|
|
234
234
|
|
|
235
235
|
Examples:
|
|
236
236
|
>>> import os
|
|
237
|
+
>>> os.environ['GLOG_v'] = '3'
|
|
237
238
|
>>> import mindspore as ms
|
|
238
|
-
>>> os.environ['GLOG_v'] = '0'
|
|
239
239
|
>>> level = ms.get_level()
|
|
240
240
|
>>> print(level)
|
|
241
|
-
'
|
|
241
|
+
'3'
|
|
242
242
|
"""
|
|
243
243
|
# level and glog level mapping dictionary
|
|
244
244
|
level_to_glog_level = dict(zip(_name_to_level.values(), _gloglevel_to_name.keys()))
|