mindspore 2.4.10__cp310-cp310-win_amd64.whl → 2.5.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +8 -3
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +0 -5
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/compile_config.py +64 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
- mindspore/_extends/parse/parser.py +23 -5
- mindspore/_extends/parse/standard_method.py +123 -27
- mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
- mindspore/amp.py +7 -1
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/boost_cell_wrapper.py +136 -41
- mindspore/common/__init__.py +3 -1
- mindspore/common/_register_for_tensor.py +0 -1
- mindspore/common/_stub_tensor.py +25 -4
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +6132 -0
- mindspore/common/api.py +98 -21
- mindspore/common/dtype.py +34 -34
- mindspore/common/dump.py +2 -1
- mindspore/common/file_system.py +8 -3
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +3 -1
- mindspore/common/initializer.py +3 -4
- mindspore/common/lazy_inline.py +8 -2
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/parameter.py +31 -15
- mindspore/common/tensor.py +713 -1337
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +5 -0
- mindspore/communication/comm_func.py +215 -173
- mindspore/communication/management.py +23 -20
- mindspore/context.py +285 -191
- mindspore/dataset/__init__.py +23 -19
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +84 -3
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +5 -4
- mindspore/dataset/engine/datasets.py +192 -149
- mindspore/dataset/engine/datasets_audio.py +14 -0
- mindspore/dataset/engine/datasets_standard_format.py +11 -11
- mindspore/dataset/engine/datasets_text.py +38 -1
- mindspore/dataset/engine/datasets_user_defined.py +100 -66
- mindspore/dataset/engine/datasets_vision.py +81 -8
- mindspore/dataset/engine/iterators.py +281 -63
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +26 -2
- mindspore/dataset/engine/serializer_deserializer.py +1 -1
- mindspore/dataset/engine/validators.py +43 -11
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +29 -12
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +94 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +127 -0
- mindspore/device_context/cpu/__init__.py +25 -0
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +134 -0
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/optim/adadelta.py +26 -22
- mindspore/experimental/optim/adam.py +3 -0
- mindspore/experimental/optim/lr_scheduler.py +33 -24
- mindspore/experimental/optim/radam.py +33 -30
- mindspore/hal/device.py +28 -0
- mindspore/hal/event.py +17 -0
- mindspore/hal/memory.py +94 -3
- mindspore/hal/stream.py +91 -6
- mindspore/include/api/context.h +0 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +12 -0
- mindspore/mindrecord/__init__.py +1 -1
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +824 -218
- mindspore/mint/distributed/__init__.py +66 -4
- mindspore/mint/distributed/distributed.py +2594 -44
- mindspore/mint/linalg/__init__.py +6 -0
- mindspore/mint/nn/__init__.py +473 -14
- mindspore/mint/nn/functional.py +486 -11
- mindspore/mint/nn/layer/__init__.py +17 -4
- mindspore/mint/nn/layer/_functions.py +330 -0
- mindspore/mint/nn/layer/activation.py +169 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +727 -0
- mindspore/mint/nn/layer/normalization.py +215 -19
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +170 -0
- mindspore/mint/optim/__init__.py +2 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/special/__init__.py +2 -1
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/cell.py +126 -19
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +6 -6
- mindspore/nn/layer/basic.py +35 -25
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/embedding.py +3 -3
- mindspore/nn/layer/normalization.py +8 -7
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +47 -13
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +48 -26
- mindspore/nn/learning_rate_schedule.py +5 -3
- mindspore/nn/loss/loss.py +31 -36
- mindspore/nn/optim/ada_grad.py +1 -0
- mindspore/nn/optim/adadelta.py +2 -2
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/cell_wrapper.py +4 -6
- mindspore/nn/wrap/loss_scale.py +3 -4
- mindspore/numpy/array_creations.py +60 -62
- mindspore/numpy/array_ops.py +148 -143
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +16 -16
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +2 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +94 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +20 -19
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
- mindspore/ops/_vmap/vmap_math_ops.py +11 -9
- mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
- mindspore/ops/auto_generate/gen_extend_func.py +554 -60
- mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
- mindspore/ops/auto_generate/gen_ops_prim.py +8024 -3409
- mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
- mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
- mindspore/ops/function/__init__.py +12 -0
- mindspore/ops/function/array_func.py +561 -159
- mindspore/ops/function/clip_func.py +64 -0
- mindspore/ops/function/debug_func.py +28 -20
- mindspore/ops/function/image_func.py +1 -1
- mindspore/ops/function/linalg_func.py +5 -4
- mindspore/ops/function/math_func.py +1659 -290
- mindspore/ops/function/nn_func.py +988 -317
- mindspore/ops/function/parameter_func.py +3 -56
- mindspore/ops/function/random_func.py +243 -33
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/functional.py +18 -5
- mindspore/ops/functional_overload.py +897 -0
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -34
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +38 -8
- mindspore/ops/operations/array_ops.py +45 -303
- mindspore/ops/operations/comm_ops.py +19 -16
- mindspore/ops/operations/custom_ops.py +11 -55
- mindspore/ops/operations/debug_ops.py +42 -47
- mindspore/ops/operations/inner_ops.py +6 -4
- mindspore/ops/operations/linalg_ops.py +3 -2
- mindspore/ops/operations/manually_defined/ops_def.py +185 -104
- mindspore/ops/operations/math_ops.py +11 -216
- mindspore/ops/operations/nn_ops.py +146 -308
- mindspore/ops/primitive.py +23 -21
- mindspore/ops/tensor_method.py +1669 -0
- mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
- mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
- mindspore/ops_generate/arg_handler.py +0 -61
- mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
- mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/base_generator.py +11 -0
- mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
- mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
- mindspore/ops_generate/functional_overload_py_generator.py +110 -0
- mindspore/ops_generate/functions_cc_generator.py +233 -0
- mindspore/ops_generate/gen_aclnn_implement.py +110 -114
- mindspore/ops_generate/gen_constants.py +157 -3
- mindspore/ops_generate/gen_ops.py +245 -990
- mindspore/ops_generate/gen_pyboost_func.py +97 -998
- mindspore/ops_generate/gen_utils.py +119 -33
- mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
- mindspore/ops_generate/op_api_proto.py +206 -0
- mindspore/ops_generate/op_def_py_generator.py +131 -0
- mindspore/ops_generate/op_prim_py_generator.py +480 -0
- mindspore/ops_generate/op_proto.py +373 -108
- mindspore/ops_generate/op_template_parser.py +436 -0
- mindspore/ops_generate/ops_def_cc_generator.py +288 -0
- mindspore/ops_generate/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/ops_name_h_generator.py +68 -0
- mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
- mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
- mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
- mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
- mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
- mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
- mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
- mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
- mindspore/ops_generate/pyboost_utils.py +92 -33
- mindspore/ops_generate/template.py +294 -44
- mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
- mindspore/parallel/__init__.py +3 -3
- mindspore/parallel/_auto_parallel_context.py +24 -33
- mindspore/parallel/_parallel_serialization.py +13 -2
- mindspore/parallel/_utils.py +4 -1
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +44 -0
- mindspore/parallel/cluster/process_entity/_api.py +131 -37
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +20 -3
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +3 -0
- mindspore/parallel/transform_safetensors.py +119 -253
- mindspore/profiler/__init__.py +17 -4
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +174 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +202 -0
- mindspore/profiler/common/path_manager.py +371 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +476 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +210 -0
- mindspore/profiler/common/profiler_path_manager.py +120 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +270 -37
- mindspore/profiler/envprofiler.py +138 -0
- mindspore/profiler/mstx.py +199 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +309 -0
- mindspore/profiler/profiler.py +580 -93
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +114 -0
- mindspore/profiler/schedule.py +208 -0
- mindspore/rewrite/api/symbol_tree.py +1 -2
- mindspore/run_check/_check_version.py +2 -6
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +148 -0
- mindspore/runtime/memory.py +392 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +2 -2
- mindspore/train/_utils.py +53 -18
- mindspore/train/amp.py +8 -4
- mindspore/train/callback/_checkpoint.py +32 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +105 -69
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_summary_collector.py +44 -6
- mindspore/train/callback/_tft_register.py +31 -10
- mindspore/train/dataset_helper.py +11 -11
- mindspore/train/metrics/precision.py +4 -5
- mindspore/train/mind_ir_pb2.py +167 -46
- mindspore/train/model.py +13 -15
- mindspore/train/serialization.py +462 -76
- mindspore/train/summary/summary_record.py +1 -2
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +4 -2
- mindspore/utils/dryrun.py +138 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/METADATA +2 -3
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/RECORD +362 -238
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
|
|
16
|
+
"""Executor manager interfaces."""
|
|
17
|
+
import subprocess
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Union
|
|
20
|
+
import re
|
|
21
|
+
from mindspore import log as logger
|
|
22
|
+
from mindspore import context
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def execute_command(cmd_list):
|
|
26
|
+
try:
|
|
27
|
+
with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
|
|
28
|
+
out, _ = p.communicate(timeout=1000)
|
|
29
|
+
res = out.decode()
|
|
30
|
+
return res
|
|
31
|
+
except FileNotFoundError as e:
|
|
32
|
+
raise RuntimeError(f"Failed to execute command, because {e}.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _validate_affinity_cpu_list(affinity_cpu_list):
|
|
36
|
+
"""
|
|
37
|
+
Validate the user-configured affinity_cpu_list.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
affinity_cpu_list (dict): Customized bind-core policy to be validated.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
None.
|
|
44
|
+
"""
|
|
45
|
+
device_pattern = re.compile(r'^device\d+$')
|
|
46
|
+
range_pattern = re.compile(r'^\d+-\d+$')
|
|
47
|
+
|
|
48
|
+
for key, value in affinity_cpu_list.items():
|
|
49
|
+
if not isinstance(key, str):
|
|
50
|
+
raise ValueError(f"The key of affinity_cpu_list: {key} should be a string.")
|
|
51
|
+
if not device_pattern.match(key):
|
|
52
|
+
raise ValueError(f"The key of affinity_cpu_list: {key} should be in format 'deviceX'.")
|
|
53
|
+
if not isinstance(value, list):
|
|
54
|
+
raise ValueError(f"The value of affinity_cpu_list: {value} should be a list.")
|
|
55
|
+
for item in value:
|
|
56
|
+
if not isinstance(item, str):
|
|
57
|
+
raise ValueError(f"The value of affinity_cpu_list: {item} should be a string.")
|
|
58
|
+
if not range_pattern.match(item):
|
|
59
|
+
raise ValueError(f"The value of affinity_cpu_list: {item} should be in format 'cpuidX-cpuidY'.")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _get_cpu_available():
|
|
63
|
+
"""
|
|
64
|
+
Get the CPU resources available on the environment.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
list: List of available CPUs on the environment.
|
|
68
|
+
"""
|
|
69
|
+
available_cpu_str = execute_command(["cat", "/sys/fs/cgroup/cpuset/cpuset.cpus"]).strip().split(",")
|
|
70
|
+
available_cpus = list()
|
|
71
|
+
for range_str in available_cpu_str:
|
|
72
|
+
endpoints = range_str.split("-")
|
|
73
|
+
if len(endpoints) != 2:
|
|
74
|
+
raise RuntimeError("'cat /sys/fs/cgroup/cpuset/cpuset.cpus' command output error, please check!")
|
|
75
|
+
available_cpus += [cid for cid in range(int(endpoints[0]), int(endpoints[1]) + 1)]
|
|
76
|
+
return available_cpus
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class DeviceInfo:
|
|
81
|
+
"""
|
|
82
|
+
A class to represent information about an Ascend device.
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
_info_line (str): A raw string containing device information.
|
|
86
|
+
npu_id (int): The ID of the NPU.
|
|
87
|
+
chip_id (int): The ID of the chip.
|
|
88
|
+
chip_logic_id (Union[int, str]): The logical ID of the chip, which can be an integer or a string.
|
|
89
|
+
chip_name (str): The name of the chip.
|
|
90
|
+
|
|
91
|
+
Methods:
|
|
92
|
+
__post_init__(): Initializes the attributes based on input.
|
|
93
|
+
"""
|
|
94
|
+
_info_line: str = ""
|
|
95
|
+
npu_id: int = 0
|
|
96
|
+
chip_id: int = 0
|
|
97
|
+
chip_logic_id: Union[int, str] = 0
|
|
98
|
+
chip_name: str = ""
|
|
99
|
+
|
|
100
|
+
def __post_init__(self):
|
|
101
|
+
self.npu_id, self.chip_id, self.chip_logic_id, self.chip_name = \
|
|
102
|
+
self._info_line.strip().split(None, 3)
|
|
103
|
+
self.npu_id = int(self.npu_id)
|
|
104
|
+
self.chip_id = int(self.chip_id)
|
|
105
|
+
if self.chip_logic_id.isnumeric():
|
|
106
|
+
self.chip_logic_id = int(self.chip_logic_id)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _get_device_map_info():
|
|
110
|
+
"""
|
|
111
|
+
Get abbreviated information about all NPUs on the environment.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
dict: Mapping of NPU logical ID to its details.
|
|
115
|
+
set: Contains all available NPU logical ids on the environment.
|
|
116
|
+
"""
|
|
117
|
+
device_map_info = {}
|
|
118
|
+
available_devices = set()
|
|
119
|
+
device_map = \
|
|
120
|
+
execute_command(["npu-smi", "info", "-m"]).strip().split("\n")[1:]
|
|
121
|
+
for line in device_map:
|
|
122
|
+
device_info = DeviceInfo(line.strip())
|
|
123
|
+
if isinstance(device_info.chip_logic_id, int):
|
|
124
|
+
device_map_info[device_info.chip_logic_id] = device_info
|
|
125
|
+
available_devices.add(device_info.chip_logic_id)
|
|
126
|
+
return device_map_info, available_devices
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _get_pcie_info(device_map_info, available_devices, keyword="PCIeBusInfo"):
|
|
130
|
+
"""
|
|
131
|
+
Get the PCIe number of the NPU device.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
device_map_info (dict): A map of NPU logical ID to its details.
|
|
135
|
+
available_devices (set): All available NPU logical ids on the environment.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict: Mapping of NPU logical ID to its PCIe number.
|
|
139
|
+
"""
|
|
140
|
+
device_pcie_map = {}
|
|
141
|
+
for device in available_devices:
|
|
142
|
+
device_info = device_map_info.get(device)
|
|
143
|
+
if not device_info:
|
|
144
|
+
raise RuntimeError("Can not get device info, binding cpu will skip.")
|
|
145
|
+
pcie_info = \
|
|
146
|
+
execute_command(["npu-smi", "info", "-t", "board", "-i", f"{device_info.npu_id}",
|
|
147
|
+
"-c", f"{device_info.chip_id}"]).strip().split("\n")
|
|
148
|
+
for _ in pcie_info:
|
|
149
|
+
line = ''.join(_.split())
|
|
150
|
+
if line.startswith(keyword):
|
|
151
|
+
device_pcie_map[device] = line[len(keyword) + 1:]
|
|
152
|
+
break
|
|
153
|
+
return device_pcie_map
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _get_numa_info(device_pcie_map, keyword="NUMAnode"):
|
|
157
|
+
"""
|
|
158
|
+
Get NUNA node affinity for device based on PCIe.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
device_pcie_map (dict): A map of NPU logical ID to its PCIe number.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
dict: Mapping of device ID to its affinity NUMA nodes.
|
|
165
|
+
dict: Mapping of NUMA node to its affinity device IDs.
|
|
166
|
+
"""
|
|
167
|
+
device_to_numa_map = {}
|
|
168
|
+
numa_to_device_map = {}
|
|
169
|
+
|
|
170
|
+
for device, pcie_no in device_pcie_map.items():
|
|
171
|
+
numa_info = execute_command(["lspci", "-s", f"{pcie_no}", "-vvv"]).strip().split("\n")
|
|
172
|
+
for _ in numa_info:
|
|
173
|
+
line = ''.join(_.split())
|
|
174
|
+
if line.startswith(keyword):
|
|
175
|
+
numa_id = int(line[len(keyword) + 1:])
|
|
176
|
+
device_to_numa_map[device] = numa_id
|
|
177
|
+
|
|
178
|
+
devices = numa_to_device_map.get(numa_id, None)
|
|
179
|
+
if devices is None:
|
|
180
|
+
numa_to_device_map[numa_id] = list()
|
|
181
|
+
numa_to_device_map[numa_id].append(device)
|
|
182
|
+
break
|
|
183
|
+
numa_to_device_map[-1] = list(device_pcie_map.keys())
|
|
184
|
+
return device_to_numa_map, numa_to_device_map
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _get_cpu_info(numa_ids, available_cpus, keyword1="NUMAnode", keyword2="CPU(s)"):
|
|
188
|
+
"""
|
|
189
|
+
Get information about the CPUs on the NUMA nodes on the environment.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
numa_ids (list): A list of NUMA nodes need to get related CPU information.
|
|
193
|
+
available_cpus (list): A list of available CPUs on the environment.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
dict: Mapping of NUMA node to its affinity CPUs.
|
|
197
|
+
"""
|
|
198
|
+
numa_to_cpu_map = dict()
|
|
199
|
+
|
|
200
|
+
cpu_info = execute_command(["lscpu"]).strip().split("\n")
|
|
201
|
+
for _ in cpu_info:
|
|
202
|
+
line = ''.join(_.split())
|
|
203
|
+
if line.startswith(keyword1):
|
|
204
|
+
pattern = re.escape(keyword1) + r'(\d+)' + re.escape(keyword2)
|
|
205
|
+
match = re.search(pattern, line)
|
|
206
|
+
if match:
|
|
207
|
+
numa_id = int(match.group(1))
|
|
208
|
+
split_info = line.split(":")
|
|
209
|
+
cpu_id_ranges = split_info[-1].split(",")
|
|
210
|
+
ranges = list()
|
|
211
|
+
for range_str in cpu_id_ranges:
|
|
212
|
+
endpoints = range_str.split("-")
|
|
213
|
+
if len(endpoints) != 2:
|
|
214
|
+
raise RuntimeError("'lscpu' command output error, please check!")
|
|
215
|
+
ranges += [cid for cid in range(int(endpoints[0]), int(endpoints[1]) + 1) if cid in available_cpus]
|
|
216
|
+
if numa_id not in numa_ids:
|
|
217
|
+
numa_id = int(-1)
|
|
218
|
+
if numa_id not in numa_to_cpu_map:
|
|
219
|
+
numa_to_cpu_map[numa_id] = list()
|
|
220
|
+
numa_to_cpu_map[numa_id].extend(ranges)
|
|
221
|
+
return numa_to_cpu_map
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _auto_generate_policy(available_devices, available_cpus, affinity_flag, numa_to_cpu_map, device_to_numa_map):
|
|
225
|
+
"""
|
|
226
|
+
Automatically generate bind-core policy based on CPU affinity.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
available_devices (list): All available NPU logical ids on the environment.
|
|
230
|
+
available_cpus (list): A list of available CPUs on the environment.
|
|
231
|
+
affinity_flag (bool): Whether or not it satisfies generating CPU affinity bind-core policy based on the
|
|
232
|
+
resources on the environment.
|
|
233
|
+
numa_to_cpu_map (dict): A map of NUMA node to its affinity CPUs.
|
|
234
|
+
device_to_numa_map (dict): A map of device ID to its affinity NUMA nodes.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
dict: Mapping of device to its affinity CPUs.
|
|
238
|
+
"""
|
|
239
|
+
device_to_cpu_map = {}
|
|
240
|
+
for device_id in available_devices:
|
|
241
|
+
device_to_cpu_map[device_id] = list()
|
|
242
|
+
|
|
243
|
+
available_cpu_num = len(available_cpus)
|
|
244
|
+
available_device_num = len(available_devices)
|
|
245
|
+
cpu_num_per_device = available_cpu_num // available_device_num
|
|
246
|
+
if cpu_num_per_device < 7:
|
|
247
|
+
raise RuntimeError(f"Cpu num available for each device is {cpu_num_per_device}, "
|
|
248
|
+
"which is less than the minimum cpu num need. Will not enable bind core feature.")
|
|
249
|
+
|
|
250
|
+
if affinity_flag:
|
|
251
|
+
device_to_cpu_idx = {}
|
|
252
|
+
for numa_id in numa_to_cpu_map:
|
|
253
|
+
device_to_cpu_idx[numa_id] = 0
|
|
254
|
+
for device_id in available_devices:
|
|
255
|
+
numa_id = device_to_numa_map.get(device_id)
|
|
256
|
+
affinity_cpu_num = 0
|
|
257
|
+
# Prioritize the use of affinity cpu resources.
|
|
258
|
+
affinity_cpu_start_idx = device_to_cpu_idx[numa_id]
|
|
259
|
+
if len(numa_to_cpu_map[numa_id][affinity_cpu_start_idx:]) >= cpu_num_per_device:
|
|
260
|
+
affinity_cpu = numa_to_cpu_map[numa_id][
|
|
261
|
+
affinity_cpu_start_idx:(affinity_cpu_start_idx + cpu_num_per_device)]
|
|
262
|
+
else:
|
|
263
|
+
affinity_cpu = numa_to_cpu_map[numa_id][affinity_cpu_start_idx:]
|
|
264
|
+
affinity_cpu_num = len(affinity_cpu)
|
|
265
|
+
device_to_cpu_map[device_id].extend(affinity_cpu)
|
|
266
|
+
device_to_cpu_idx[numa_id] = affinity_cpu_start_idx + affinity_cpu_num
|
|
267
|
+
# If the affinity cpu resources are insufficient then use resources from the non-affinity cpu pool.
|
|
268
|
+
if -1 in device_to_cpu_idx:
|
|
269
|
+
unaffinity_cpu_start_idx = device_to_cpu_idx[-1]
|
|
270
|
+
unaffinity_cpu_num = cpu_num_per_device - affinity_cpu_num
|
|
271
|
+
unaffinity_cpu = numa_to_cpu_map[-1][
|
|
272
|
+
unaffinity_cpu_start_idx:(unaffinity_cpu_start_idx + unaffinity_cpu_num)]
|
|
273
|
+
device_to_cpu_map[device_id].extend(unaffinity_cpu)
|
|
274
|
+
device_to_cpu_idx[-1] = unaffinity_cpu_start_idx + unaffinity_cpu_num
|
|
275
|
+
else:
|
|
276
|
+
device_rank = 0
|
|
277
|
+
for device_id in available_devices:
|
|
278
|
+
cpu_start = device_rank * cpu_num_per_device
|
|
279
|
+
device_to_cpu_map[device_id] = available_cpus[cpu_start:(cpu_start + cpu_num_per_device)]
|
|
280
|
+
device_rank += 1
|
|
281
|
+
return device_to_cpu_map
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _customize_generate_policy(affinity_cpu_list, available_cpus):
|
|
285
|
+
"""
|
|
286
|
+
Generate customized bind-core policy based on user-configured inputs.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
affinity_cpu_list (dict): User-configured inputs to generate customized bind-core policy.
|
|
290
|
+
available_cpus (list): A list of available CPUs on the environment.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
dict: Mapping of device to its affinity CPUs.
|
|
294
|
+
"""
|
|
295
|
+
device_to_cpu_map = {}
|
|
296
|
+
_validate_affinity_cpu_list(affinity_cpu_list)
|
|
297
|
+
for device, cpu_id_ranges in affinity_cpu_list.items():
|
|
298
|
+
ranges = list()
|
|
299
|
+
for range_str in cpu_id_ranges:
|
|
300
|
+
endpoints = range_str.split("-")
|
|
301
|
+
for cid in range(int(endpoints[0]), int(endpoints[1]) + 1):
|
|
302
|
+
if cid not in available_cpus:
|
|
303
|
+
raise RuntimeError(f"CPU id: {cid} set in affinity_cpu_list is not available.")
|
|
304
|
+
ranges.append(cid)
|
|
305
|
+
if len(ranges) < 7:
|
|
306
|
+
raise RuntimeError(f"cpu num available for {device} is less than 7, which is the minimum cpu num need.")
|
|
307
|
+
device_id = int(device.replace("device", ""))
|
|
308
|
+
device_to_cpu_map[device_id] = ranges
|
|
309
|
+
return device_to_cpu_map
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _assign_cpu_to_module(device_to_cpu_map):
|
|
313
|
+
"""
|
|
314
|
+
Assign specific CPUs to modules.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
device_to_cpu_map (dict): A map of device to its affinity CPUs.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
dict: Mapping of device to its affinity CPUs based on module segmentation.
|
|
321
|
+
"""
|
|
322
|
+
module_bind_core_policy = {}
|
|
323
|
+
for device, cpu_list in device_to_cpu_map.items():
|
|
324
|
+
thread_to_cpu_map = {}
|
|
325
|
+
thread_to_cpu_map["main"] = [cpu_list[0]]
|
|
326
|
+
thread_to_cpu_map["runtime"] = cpu_list[1:6]
|
|
327
|
+
thread_to_cpu_map["pynative"] = cpu_list[1:5]
|
|
328
|
+
thread_to_cpu_map["minddata"] = cpu_list[6:]
|
|
329
|
+
module_bind_core_policy[device] = thread_to_cpu_map
|
|
330
|
+
return module_bind_core_policy
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _get_cpu_affinity_policy(affinity_cpu_list=None):
|
|
334
|
+
"""
|
|
335
|
+
The entry to get bind-core policy.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
affinity_cpu_list (dict, optional): User-configured inputs to generate customized bind-core policy.
|
|
339
|
+
Default: ``None``.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
dict: Mapping of device to its affinity CPUs based on module segmentation.
|
|
343
|
+
bool: Whether the generated bind-core policy is based on cpu affinity.
|
|
344
|
+
"""
|
|
345
|
+
device_target = context.get_context("device_target")
|
|
346
|
+
device_pcie_map = {}
|
|
347
|
+
device_to_numa_map = {}
|
|
348
|
+
numa_to_device_map = {}
|
|
349
|
+
numa_to_cpu_map = {}
|
|
350
|
+
affinity_flag = False
|
|
351
|
+
bind_policy_flag = False
|
|
352
|
+
|
|
353
|
+
# Get the CPU resources in the environment. If this fails, the binding core feature will not be enabled.
|
|
354
|
+
try:
|
|
355
|
+
available_cpus = _get_cpu_available()
|
|
356
|
+
except RuntimeError as e:
|
|
357
|
+
logger.warning(f"Failed to acquire available cpu info, error: {e} Will not enable bind core feature.")
|
|
358
|
+
return {}, False
|
|
359
|
+
# Automatic generation of binding core policy based on resources on the environment.
|
|
360
|
+
if (affinity_cpu_list is None) or (not affinity_cpu_list):
|
|
361
|
+
# If the device target is Ascend, the affinity between the device and NUMA node is taken into account
|
|
362
|
+
# to generate the binding core policy.
|
|
363
|
+
if device_target == "Ascend":
|
|
364
|
+
# Get the hardware resources in the environment. If this fails, will bind core not based on device.
|
|
365
|
+
try:
|
|
366
|
+
device_map_info, available_devices = _get_device_map_info()
|
|
367
|
+
except RuntimeError as e:
|
|
368
|
+
logger.warning(f"Failed to acquire device to numa affinity info, error: {e} "
|
|
369
|
+
"Will not bind core based on affinity. Module bind core policy "
|
|
370
|
+
f"generated: {available_cpus}.")
|
|
371
|
+
return available_cpus, bind_policy_flag
|
|
372
|
+
# Get the affinity resources in the environment. If this fails, will bind core not based on affinity.
|
|
373
|
+
try:
|
|
374
|
+
device_pcie_map = _get_pcie_info(device_map_info, available_devices)
|
|
375
|
+
device_to_numa_map, numa_to_device_map = _get_numa_info(device_pcie_map)
|
|
376
|
+
numa_to_cpu_map = _get_cpu_info(list(numa_to_device_map.keys()), available_cpus)
|
|
377
|
+
except RuntimeError as e:
|
|
378
|
+
logger.warning(f"Failed to acquire device to numa affinity info, error: {e} "
|
|
379
|
+
"Will not bind core based on affinity.")
|
|
380
|
+
affinity_flag = False
|
|
381
|
+
if device_pcie_map and device_to_numa_map and numa_to_device_map and numa_to_cpu_map:
|
|
382
|
+
affinity_flag = True
|
|
383
|
+
# Auto-generation of bind core policy for Ascned.
|
|
384
|
+
try:
|
|
385
|
+
device_to_cpu_map = _auto_generate_policy(available_devices, available_cpus, affinity_flag,
|
|
386
|
+
numa_to_cpu_map, device_to_numa_map)
|
|
387
|
+
except (RuntimeError, ZeroDivisionError) as e:
|
|
388
|
+
logger.warning(f"Failed to auto generate bind core policy, error: {e}. "
|
|
389
|
+
"Will not enable bind core feature.")
|
|
390
|
+
return {}, False
|
|
391
|
+
module_bind_core_policy = _assign_cpu_to_module(device_to_cpu_map)
|
|
392
|
+
bind_policy_flag = True
|
|
393
|
+
else:
|
|
394
|
+
module_bind_core_policy = available_cpus
|
|
395
|
+
# User configured binding core policy.
|
|
396
|
+
else:
|
|
397
|
+
device_to_cpu_map = _customize_generate_policy(affinity_cpu_list, available_cpus)
|
|
398
|
+
module_bind_core_policy = _assign_cpu_to_module(device_to_cpu_map)
|
|
399
|
+
bind_policy_flag = True
|
|
400
|
+
logger.warning(f"Module bind core policy generated: {module_bind_core_policy}.")
|
|
401
|
+
return module_bind_core_policy, bind_policy_flag
|
mindspore/swresample-4.dll
CHANGED
|
Binary file
|
mindspore/swscale-6.dll
CHANGED
|
Binary file
|
mindspore/tinyxml2.dll
CHANGED
|
Binary file
|
mindspore/train/__init__.py
CHANGED
|
@@ -27,7 +27,7 @@ from mindspore.train.loss_scale_manager import LossScaleManager, FixedLossScaleM
|
|
|
27
27
|
from mindspore.train.serialization import save_checkpoint, load_checkpoint, load_param_into_net, export, \
|
|
28
28
|
load, parse_print, build_searched_strategy, merge_sliced_parameter, load_distributed_checkpoint, \
|
|
29
29
|
async_ckpt_thread_status, restore_group_info_list, convert_model, obfuscate_model, export_split_mindir, \
|
|
30
|
-
load_checkpoint_async, check_checkpoint, get_ckpt_path_with_strategy
|
|
30
|
+
load_checkpoint_async, check_checkpoint, get_ckpt_path_with_strategy, ckpt_to_safetensors, safetensors_to_ckpt
|
|
31
31
|
from mindspore.train.callback import Callback, LossMonitor, TimeMonitor, ModelCheckpoint, SummaryCollector, \
|
|
32
32
|
CheckpointConfig, RunContext, LearningRateScheduler, SummaryLandscape, FlopsUtilizationCollector, \
|
|
33
33
|
History, LambdaCallback, ReduceLROnPlateau, EarlyStopping, OnRequestExit, BackupAndRestore, TFTRegister
|
|
@@ -41,7 +41,7 @@ __all__ = ["Model", "DatasetHelper", "connect_network_with_dataset", "build_trai
|
|
|
41
41
|
"load_param_into_net", "export", "load", "export_split_mindir", "parse_print", "build_searched_strategy",
|
|
42
42
|
"merge_sliced_parameter", "load_distributed_checkpoint", "async_ckpt_thread_status",
|
|
43
43
|
"restore_group_info_list", "convert_model", "data_sink", "obfuscate_model", "load_checkpoint_async",
|
|
44
|
-
"get_ckpt_path_with_strategy"]
|
|
44
|
+
"get_ckpt_path_with_strategy", "ckpt_to_safetensors", "safetensors_to_ckpt"]
|
|
45
45
|
__all__.extend(callback.__all__)
|
|
46
46
|
__all__.extend(summary.__all__)
|
|
47
47
|
__all__.extend(train_thor.__all__)
|
mindspore/train/_utils.py
CHANGED
|
@@ -16,15 +16,15 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
-
import threading
|
|
20
|
-
from datetime import datetime
|
|
21
19
|
import json
|
|
22
20
|
from collections.abc import Iterable
|
|
23
21
|
|
|
22
|
+
import time
|
|
24
23
|
import numpy as np
|
|
25
24
|
|
|
26
25
|
from mindspore.common.tensor import Tensor
|
|
27
26
|
from mindspore._c_expression import Tensor as Tensor_
|
|
27
|
+
from mindspore._c_expression import MSContext, ms_ctx_param
|
|
28
28
|
from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype
|
|
29
29
|
from mindspore.common import dtype as mstype
|
|
30
30
|
from mindspore import context
|
|
@@ -64,6 +64,10 @@ def _get_types_and_shapes(dataset):
|
|
|
64
64
|
dataset_shapes = dataset.output_shapes()
|
|
65
65
|
return dataset_types, dataset_shapes
|
|
66
66
|
|
|
67
|
+
def enable_data_broadcast():
|
|
68
|
+
"""Get status to indicate if enable dataset broadcast."""
|
|
69
|
+
return MSContext.get_instance().get_param(ms_ctx_param.dataset_broadcast_opt_level) > 0
|
|
70
|
+
|
|
67
71
|
|
|
68
72
|
def _exec_datagraph(exec_dataset, dataset_size, phase='dataset', create_data_info_queue=False):
|
|
69
73
|
"""Initialize and execute the dataset graph."""
|
|
@@ -77,15 +81,12 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset', create_data_inf
|
|
|
77
81
|
if queue_name is None:
|
|
78
82
|
queue_name = str("")
|
|
79
83
|
|
|
84
|
+
# Don't enable dynamic shape(multi-subgraph) feature in pp/data_broadcast mode,
|
|
85
|
+
# otherwise get_data_info will stuck since some rank do not consume data.
|
|
80
86
|
use_pipeline_parallel = (context.get_auto_parallel_context("pipeline_stages") > 1)
|
|
87
|
+
data_broadcast = enable_data_broadcast()
|
|
81
88
|
|
|
82
|
-
|
|
83
|
-
dynamic_sink1_env = os.getenv("MS_DEV_DYNAMIC_SINK1", None)
|
|
84
|
-
dynamic_sink1 = True
|
|
85
|
-
if dynamic_sink1_env and dynamic_sink1_env.strip() in ['False', 'false']:
|
|
86
|
-
dynamic_sink1 = False
|
|
87
|
-
|
|
88
|
-
if use_pipeline_parallel or not dynamic_sink1:
|
|
89
|
+
if use_pipeline_parallel or data_broadcast:
|
|
89
90
|
create_data_info_queue = False
|
|
90
91
|
|
|
91
92
|
exec_dataset = exec_dataset.device_que(send_epoch_end=send_epoch_end,
|
|
@@ -514,12 +515,46 @@ def parse_hccl_file(hccl_file_path):
|
|
|
514
515
|
return rankid_dict
|
|
515
516
|
|
|
516
517
|
|
|
517
|
-
def
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
518
|
+
def _progress_bar(iterable, total=None):
|
|
519
|
+
"""
|
|
520
|
+
Decorate an iterable object, returning an iterator which acts exactly
|
|
521
|
+
like the original iterable, but prints a dynamically updating
|
|
522
|
+
progressbar every time a value is requested.
|
|
523
|
+
"""
|
|
524
|
+
if total is None:
|
|
525
|
+
total = len(iterable)
|
|
526
|
+
|
|
527
|
+
start_time = time.time()
|
|
528
|
+
|
|
529
|
+
def print_progress_bar(iteration):
|
|
530
|
+
percent = f"{100 * (iteration / float(total)):.1f}"
|
|
531
|
+
bar_length = 40
|
|
532
|
+
filled_length = int(bar_length * iteration // total)
|
|
533
|
+
bar = '█' * filled_length + '-' * (bar_length - filled_length)
|
|
534
|
+
|
|
535
|
+
elapsed_time = time.time() - start_time
|
|
536
|
+
estimated_total_time = elapsed_time / iteration * total
|
|
537
|
+
remaining_time = estimated_total_time - elapsed_time
|
|
538
|
+
|
|
539
|
+
elapsed_time_str = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
|
|
540
|
+
remaining_time_str = time.strftime("%H:%M:%S", time.gmtime(remaining_time))
|
|
541
|
+
|
|
542
|
+
print(f'\r{percent}%|{bar}|[{elapsed_time_str}<{remaining_time_str}]', end='')
|
|
543
|
+
if iteration == total:
|
|
544
|
+
print()
|
|
545
|
+
|
|
546
|
+
for i, item in enumerate(iterable, start=1):
|
|
547
|
+
yield item
|
|
548
|
+
print_progress_bar(i)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _load_and_transform(path, name_map, load_func, transform_func):
|
|
552
|
+
if load_func is not None:
|
|
553
|
+
param_dict = load_func(path)
|
|
554
|
+
else:
|
|
555
|
+
param_dict = path
|
|
556
|
+
transform_dict = {}
|
|
557
|
+
for k, v in param_dict.items():
|
|
558
|
+
new_name = name_map.get(k, k) if name_map is not None else k
|
|
559
|
+
transform_dict[new_name] = transform_func(v, new_name)
|
|
560
|
+
return transform_dict
|
mindspore/train/amp.py
CHANGED
|
@@ -101,6 +101,7 @@ AMP_AUTO_BLACK_LIST = [
|
|
|
101
101
|
P.LayerNorm,
|
|
102
102
|
gen.LayerNormExt,
|
|
103
103
|
P.BatchNorm,
|
|
104
|
+
gen.BatchNormExt,
|
|
104
105
|
gen.GroupNorm,
|
|
105
106
|
P.KLDivLoss,
|
|
106
107
|
P.SmoothL1Loss,
|
|
@@ -112,6 +113,7 @@ AMP_AUTO_BLACK_LIST = [
|
|
|
112
113
|
P.Pdist,
|
|
113
114
|
P.Cdist,
|
|
114
115
|
P.Renorm,
|
|
116
|
+
gen.MSELossExt,
|
|
115
117
|
]
|
|
116
118
|
|
|
117
119
|
# Indicates which inputs of primitives need to be converted
|
|
@@ -428,15 +430,15 @@ def auto_mixed_precision(network, amp_level="O0", dtype=mstype.float16):
|
|
|
428
430
|
|
|
429
431
|
``Pow``, ``ACos``, ``Asin``, ``Cosh``, ``Erfinv``, ``Exp``, ``Expm1``, ``Log``, ``Log1p``, ``Reciprocal``,
|
|
430
432
|
``Rsqrt``, ``Sinh``, ``Tan``, ``Softplus``, ``SoftplusExt``, ``LayerNorm``, ``LayerNormExt``, ``BatchNorm``,
|
|
431
|
-
``GroupNorm``, ``KLDivLoss``, ``SmoothL1Loss``, ``MultilabelMarginLoss``, ``SoftMarginLoss``,
|
|
433
|
+
``BatchNormExt``, ``GroupNorm``, ``KLDivLoss``, ``SmoothL1Loss``, ``MultilabelMarginLoss``, ``SoftMarginLoss``,
|
|
432
434
|
``TripletMarginLoss``, ``MultiMarginLoss``, ``BCEWithLogitsLoss``, ``Pdist``, ``Cdist``, ``Renorm``,
|
|
433
435
|
``ReduceProd``, ``Softmax``, ``LogSoftmax``, ``CumProd``, ``CumSum``, ``CumsumExt``, ``ProdExt``, ``SumExt``,
|
|
434
|
-
``Norm``
|
|
436
|
+
``Norm``, ``MSELossExt``
|
|
435
437
|
|
|
436
438
|
Operators in `promote_list` are:
|
|
437
439
|
|
|
438
440
|
``Addcdiv``, ``Addcmul``, ``Cross``, ``_PyboostCrossPrim``, ``Dot``, ``GridSampler2D``, ``GridSampler3D``,
|
|
439
|
-
``BiasAdd``
|
|
441
|
+
``BiasAdd``, ``AddN``, ``Concat``
|
|
440
442
|
|
|
441
443
|
For details on automatic mixed precision, refer to
|
|
442
444
|
`Automatic Mix Precision <https://www.mindspore.cn/tutorials/en/master/beginner/mixed_precision.html>`_ .
|
|
@@ -837,12 +839,14 @@ def custom_mixed_precision(network, *, white_list=None, black_list=None, dtype=m
|
|
|
837
839
|
- Repeatedly calling mixed-precision interfaces, such as `custom_mixed_precision` and `auto_mixed_precision`,
|
|
838
840
|
can result in a larger network hierarchy and slower performance.
|
|
839
841
|
- If interfaces like `Model` and `build_train_network` is used to train the network which is converted by
|
|
840
|
-
mixed-precision interfaces such as `custom_mixed_precision` and `auto_mixed_precision`, `amp_level`
|
|
842
|
+
mixed-precision interfaces such as `custom_mixed_precision` and `auto_mixed_precision`, `amp_level` or `level`
|
|
841
843
|
need to be configured to ``O0`` to avoid the duplicated accuracy conversion.
|
|
842
844
|
- Primitives for blacklist is not support yet.
|
|
843
845
|
|
|
844
846
|
Args:
|
|
845
847
|
network (Cell): Definition of the network.
|
|
848
|
+
|
|
849
|
+
Keyword Args:
|
|
846
850
|
white_list (list[Primitive, Cell], optional): White list of custom mixed precision. Defaults: ``None`` , means
|
|
847
851
|
white list is not used.
|
|
848
852
|
black_list (list[Cell], optional): Black list of custom mixed precision. Defaults: ``None`` , means
|