mindspore 2.6.0rc1__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +37 -62
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +43 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +77 -16
- mindspore/common/api.py +238 -113
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +81 -81
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +58 -40
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +33 -3
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +212 -9
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +137 -101
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +328 -502
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +3 -3
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
- mindspore/ops/auto_generate/gen_extend_func.py +23 -141
- mindspore/ops/auto_generate/gen_ops_def.py +727 -321
- mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +9 -96
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +33 -540
- mindspore/ops/function/nn_func.py +28 -74
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +571 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +2 -2
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +294 -174
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +7 -39
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +47 -8
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +11 -8
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +14 -7
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +11 -7
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +6 -7
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +3 -4
- mindspore/parallel/transform_safetensors.py +463 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +30 -32
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +14 -4
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +97 -16
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +135 -55
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +333 -371
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/train/serialization.py
CHANGED
|
@@ -31,15 +31,14 @@ from multiprocessing import active_children
|
|
|
31
31
|
import multiprocessing as mp
|
|
32
32
|
from collections import OrderedDict
|
|
33
33
|
from io import BytesIO
|
|
34
|
+
from functools import partial
|
|
34
35
|
|
|
35
36
|
import math
|
|
36
37
|
import sys
|
|
37
38
|
import time
|
|
38
|
-
import google
|
|
39
39
|
import numpy as np
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
from safetensors import safe_open
|
|
40
|
+
from safetensors.numpy import save_file
|
|
41
|
+
import google
|
|
43
42
|
|
|
44
43
|
from mindspore.train.checkpoint_pb2 import Checkpoint
|
|
45
44
|
from mindspore.train.mind_ir_pb2 import ModelProto as mindir_model
|
|
@@ -76,6 +75,7 @@ from mindspore.parallel.checkpoint_transform import restore_group_info_list as n
|
|
|
76
75
|
from mindspore.parallel.checkpoint_transform import load_distributed_checkpoint as new_load_distributed_checkpoint
|
|
77
76
|
from mindspore.parallel.checkpoint_transform import merge_sliced_parameter as new_merge_sliced_parameter
|
|
78
77
|
from mindspore.parallel.checkpoint_transform import build_searched_strategy as new_build_searched_strategy
|
|
78
|
+
from mindspore.parallel.transform_safetensors import _fast_safe_open
|
|
79
79
|
from mindspore.train._utils import read_proto, get_parameter_redundancy, _progress_bar, _load_and_transform
|
|
80
80
|
from mindspore._c_expression import load_mindir, _encrypt, _decrypt, _is_cipher_file, \
|
|
81
81
|
split_mindir, split_dynamic_mindir
|
|
@@ -99,6 +99,8 @@ mindir_to_tensor_type = {1: mstype.float32, 2: mstype.uint8, 3: mstype.int8, 4:
|
|
|
99
99
|
5: mstype.int16, 6: mstype.int32, 7: mstype.int64, 10: mstype.float16,
|
|
100
100
|
11: mstype.float64, 12: mstype.uint32, 13: mstype.uint64}
|
|
101
101
|
|
|
102
|
+
safetensors_to_mstype = {'Int4': mstype.qint4x2}
|
|
103
|
+
|
|
102
104
|
_ckpt_mutex = RLock()
|
|
103
105
|
|
|
104
106
|
# unit is KB
|
|
@@ -272,10 +274,7 @@ def _update_param(param, new_param, strict_load):
|
|
|
272
274
|
|
|
273
275
|
if param.data.dtype != new_param.data.dtype:
|
|
274
276
|
if _type_convert(param, new_param, strict_load):
|
|
275
|
-
|
|
276
|
-
new_tensor = cpu_cast(new_param.data, param.data.dtype)
|
|
277
|
-
else:
|
|
278
|
-
new_tensor = Tensor(new_param.data.asnumpy(), param.data.dtype)
|
|
277
|
+
new_tensor = Tensor(new_param.data.asnumpy(), param.data.dtype)
|
|
279
278
|
param.set_data(new_tensor, param.sliced)
|
|
280
279
|
return
|
|
281
280
|
|
|
@@ -313,7 +312,7 @@ def _update_param(param, new_param, strict_load):
|
|
|
313
312
|
def _type_convert(param, new_param, strict_load):
|
|
314
313
|
"""Whether to convert parameter's type during load checkpoint into network."""
|
|
315
314
|
float_type = (mstype.float16, mstype.float32, mstype.float64, mstype.bfloat16)
|
|
316
|
-
int_type = (mstype.int8, mstype.int16, mstype.int32, mstype.int64)
|
|
315
|
+
int_type = (mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.qint4x2)
|
|
317
316
|
if not strict_load and ({param.data.dtype, new_param.data.dtype}.issubset(float_type) or
|
|
318
317
|
{param.data.dtype, new_param.data.dtype}.issubset(int_type)):
|
|
319
318
|
logger.warning(f"The type of {new_param.name}:{new_param.data.dtype} in 'parameter_dict' is different from "
|
|
@@ -359,7 +358,7 @@ def _save_weight(checkpoint_dir, model_name, iteration, params):
|
|
|
359
358
|
|
|
360
359
|
|
|
361
360
|
def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_param_inc=False, crc_check=False,
|
|
362
|
-
format="ckpt"):
|
|
361
|
+
format="ckpt", remove_redundancy=None):
|
|
363
362
|
"""Execute the process of saving checkpoint into file."""
|
|
364
363
|
try:
|
|
365
364
|
with _ckpt_mutex:
|
|
@@ -383,9 +382,6 @@ def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_
|
|
|
383
382
|
|
|
384
383
|
crc_num = 0
|
|
385
384
|
for name, value in data_list.items():
|
|
386
|
-
if name == "random_op":
|
|
387
|
-
_write_random_seed(name, value, f)
|
|
388
|
-
continue
|
|
389
385
|
if value[0] == "mapparameter":
|
|
390
386
|
_write_mapparameter(name, value, f, map_param_inc)
|
|
391
387
|
continue
|
|
@@ -428,16 +424,19 @@ def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_
|
|
|
428
424
|
elif format == "safetensors":
|
|
429
425
|
save_dict = {}
|
|
430
426
|
crc_num = 0
|
|
427
|
+
meta_data = {"format": "ms"}
|
|
428
|
+
if remove_redundancy is not None and isinstance(remove_redundancy, bool):
|
|
429
|
+
meta_data["remove_redundancy"] = str(remove_redundancy)
|
|
431
430
|
for name in sorted(data_list.keys()):
|
|
432
431
|
value = data_list[name]
|
|
433
432
|
if isinstance(value[2], np.ndarray):
|
|
433
|
+
if value[1] == str(mstype.qint4x2):
|
|
434
|
+
meta_data[name] = str(mstype.qint4x2)
|
|
434
435
|
save_dict[name] = value[2]
|
|
435
436
|
else:
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
new_np_array = np_array.reshape(value[0])
|
|
440
|
-
save_dict[name] = new_np_array
|
|
437
|
+
if value[2].dtype == mstype.qint4x2:
|
|
438
|
+
meta_data[name] = str(mstype.qint4x2)
|
|
439
|
+
save_dict[name] = value[2].asnumpy()
|
|
441
440
|
|
|
442
441
|
if crc_check:
|
|
443
442
|
crc_num = binascii.crc32(bytes(name, encoding='utf-8'), crc_num)
|
|
@@ -445,10 +444,12 @@ def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_
|
|
|
445
444
|
bytes(save_dict[name]), crc_num)
|
|
446
445
|
safetensors_save_time_start = time.time()
|
|
447
446
|
if crc_check:
|
|
448
|
-
|
|
449
|
-
|
|
447
|
+
meta_data.update({"crc_num": str(crc_num)})
|
|
448
|
+
if save_dict:
|
|
449
|
+
save_file(save_dict, tmp_name, metadata=meta_data)
|
|
450
450
|
else:
|
|
451
451
|
save_file(save_dict, tmp_name)
|
|
452
|
+
|
|
452
453
|
safetensors_save_time_end = time.time()
|
|
453
454
|
cost_time = safetensors_save_time_end - safetensors_save_time_start
|
|
454
455
|
vlog_print("1", "ME", __file__, sys._getframe().f_lineno, f"Save safetensors io cost time:{cost_time}.")
|
|
@@ -464,18 +465,6 @@ def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_
|
|
|
464
465
|
raise e
|
|
465
466
|
|
|
466
467
|
|
|
467
|
-
def _write_random_seed(name, value, f):
|
|
468
|
-
"""Write random op into protobuf file."""
|
|
469
|
-
checkpoint_list = Checkpoint()
|
|
470
|
-
param_value = checkpoint_list.value.add()
|
|
471
|
-
param_value.tag = name
|
|
472
|
-
param_tensor = param_value.tensor
|
|
473
|
-
param_tensor.dims.extend(0)
|
|
474
|
-
param_tensor.tensor_type = "random_op"
|
|
475
|
-
param_tensor.tensor_content = value
|
|
476
|
-
f.write(checkpoint_list.SerializeToString())
|
|
477
|
-
|
|
478
|
-
|
|
479
468
|
def _write_parameter_data(name, value, f, enc_key, plain_data, crc_num=0, crc_check=False, ckpt_total_io_time=0):
|
|
480
469
|
"""Write parameter data into protobuf file."""
|
|
481
470
|
data_size = value[2].nbytes / 1024
|
|
@@ -599,7 +588,7 @@ def _check_save_obj_and_ckpt_file_name(save_obj, ckpt_file_name, format):
|
|
|
599
588
|
return ckpt_file_name
|
|
600
589
|
|
|
601
590
|
|
|
602
|
-
def
|
|
591
|
+
def _check_load_checkpoint_unsupported_param(format, dec_key, dec_mode):
|
|
603
592
|
"""check load checkpoint unsupported param"""
|
|
604
593
|
if format != "safetensors":
|
|
605
594
|
return
|
|
@@ -614,7 +603,7 @@ def _check_load_checkpoint_upsupported_param(format, dec_key, dec_mode):
|
|
|
614
603
|
f"be set to default value '{default_value}', but got '{current_value}'.")
|
|
615
604
|
|
|
616
605
|
|
|
617
|
-
def
|
|
606
|
+
def _check_save_checkpoint_unsupported_param(format, enc_key, enc_mode, map_param_inc=False, global_step_num=None):
|
|
618
607
|
"""check save checkpoint unsupported param"""
|
|
619
608
|
if format != "safetensors":
|
|
620
609
|
return
|
|
@@ -644,11 +633,11 @@ def _check_async_save(async_save):
|
|
|
644
633
|
|
|
645
634
|
|
|
646
635
|
def _async_process_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_param_inc=False,
|
|
647
|
-
crc_check=False, format="ckpt", cond=None):
|
|
636
|
+
crc_check=False, format="ckpt", cond=None, remove_redundancy=None):
|
|
648
637
|
"""Check whether the process is pulled up successfully, execute the process of saving checkpoint into file."""
|
|
649
638
|
with cond:
|
|
650
639
|
cond.notify()
|
|
651
|
-
_exec_save(ckpt_file_name, data_list, enc_key, enc_mode, map_param_inc, crc_check, format)
|
|
640
|
+
_exec_save(ckpt_file_name, data_list, enc_key, enc_mode, map_param_inc, crc_check, format, remove_redundancy)
|
|
652
641
|
|
|
653
642
|
|
|
654
643
|
def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
|
|
@@ -739,7 +728,9 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
|
|
|
739
728
|
map_param_inc = kwargs.get('incremental', False)
|
|
740
729
|
logger.info("Execute the process of saving checkpoint files.")
|
|
741
730
|
global_step_num = kwargs.get('global_step_num', None)
|
|
742
|
-
|
|
731
|
+
remove_redundancy = kwargs.get('remove_redundancy', None)
|
|
732
|
+
remove_redundancy = Validator.check_isinstance("remove_redundancy", remove_redundancy, (type(None), bool))
|
|
733
|
+
_check_save_checkpoint_unsupported_param(format, enc_key, enc_mode, map_param_inc, global_step_num)
|
|
743
734
|
|
|
744
735
|
if append_dict and "__exception_save__" in append_dict:
|
|
745
736
|
s1 = mindspore.hal.Stream()
|
|
@@ -768,16 +759,6 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
|
|
|
768
759
|
data_list_np = OrderedDict()
|
|
769
760
|
with _ckpt_mutex:
|
|
770
761
|
for param in save_obj:
|
|
771
|
-
if param["name"] == "random_op":
|
|
772
|
-
if os.getenv("AITURBO") == "1":
|
|
773
|
-
data_list_np["random_op"] = []
|
|
774
|
-
data_list_np["random_op"].append(param["data"])
|
|
775
|
-
if crc_check:
|
|
776
|
-
bytes_value = bytes(data_list_np[key][0])
|
|
777
|
-
data_list_np[key].append(binascii.crc32(bytes_value))
|
|
778
|
-
else:
|
|
779
|
-
data_list["random_op"] = param["data"]
|
|
780
|
-
continue
|
|
781
762
|
key = param["name"]
|
|
782
763
|
data_list[key] = []
|
|
783
764
|
data_list_np[key] = []
|
|
@@ -841,7 +822,7 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
|
|
|
841
822
|
while process_flag:
|
|
842
823
|
process = ctx.Process(target=_async_process_save,
|
|
843
824
|
args=(ckpt_file_name, data_list, enc_key, enc_mode, map_param_inc, crc_check,
|
|
844
|
-
format, cond), daemon=True, name="asyn_save_ckpt")
|
|
825
|
+
format, cond, remove_redundancy), daemon=True, name="asyn_save_ckpt")
|
|
845
826
|
process.start()
|
|
846
827
|
with cond:
|
|
847
828
|
wait_flag = cond.wait(timeout=5)
|
|
@@ -854,11 +835,12 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
|
|
|
854
835
|
data_copy = copy.deepcopy(data_list)
|
|
855
836
|
_wait_async_thread_save_ckpt()
|
|
856
837
|
thr = Thread(target=_exec_save,
|
|
857
|
-
args=(ckpt_file_name, data_copy, enc_key, enc_mode, map_param_inc, crc_check, format
|
|
838
|
+
args=(ckpt_file_name, data_copy, enc_key, enc_mode, map_param_inc, crc_check, format,
|
|
839
|
+
remove_redundancy),
|
|
858
840
|
name="asyn_save_ckpt")
|
|
859
841
|
thr.start()
|
|
860
842
|
else:
|
|
861
|
-
_exec_save(ckpt_file_name, data_list, enc_key, enc_mode, map_param_inc, crc_check, format)
|
|
843
|
+
_exec_save(ckpt_file_name, data_list, enc_key, enc_mode, map_param_inc, crc_check, format, remove_redundancy)
|
|
862
844
|
|
|
863
845
|
mstx.range_end(range_id)
|
|
864
846
|
logger.info("Saving checkpoint process is finished.")
|
|
@@ -926,10 +908,13 @@ def _convert_dict_to_param_dict(save_obj, choice_func):
|
|
|
926
908
|
"""Convert a dict of Parameter to param_list."""
|
|
927
909
|
param_list = []
|
|
928
910
|
for (key, value) in save_obj.items():
|
|
929
|
-
if isinstance(key, str)
|
|
911
|
+
if isinstance(key, str):
|
|
930
912
|
if choice_func is not None and not choice_func(key):
|
|
931
913
|
continue
|
|
932
|
-
|
|
914
|
+
if isinstance(value, np.ndarray):
|
|
915
|
+
each_param = {"name": key, "data": Parameter(Tensor.from_numpy(value))}
|
|
916
|
+
if isinstance(value, (Parameter, str)) or _is_buffer_type(value):
|
|
917
|
+
each_param = {"name": key, "data": value}
|
|
933
918
|
param_list.append(each_param)
|
|
934
919
|
else:
|
|
935
920
|
raise TypeError(f"For save_checkpoint, when save_obj is made up by dict, the key should be str and"
|
|
@@ -941,16 +926,12 @@ def _convert_dict_to_param_dict(save_obj, choice_func):
|
|
|
941
926
|
def _convert_cell_param_and_names_to_dict(save_obj, choice_func, is_parallel_mode):
|
|
942
927
|
"""Convert cell.parameters_and_names to OrderedDict."""
|
|
943
928
|
param_dict = OrderedDict()
|
|
929
|
+
is_graph_mode = context.get_context('mode') == context.GRAPH_MODE
|
|
944
930
|
for _, param in save_obj.parameters_and_names():
|
|
945
|
-
if param.name.startswith("accu_grads") or param.name.endswith("expert_load"):
|
|
946
|
-
continue
|
|
947
|
-
not_sliced = not param.sliced
|
|
948
|
-
is_graph_mode = context.get_context('mode') == context.GRAPH_MODE
|
|
949
931
|
# All parameters are initialized immediately under PyNative mode, skip this judgement.
|
|
950
|
-
judgment = not_sliced or param.has_init
|
|
951
932
|
if param.param_info.is_pipeline_shared_param:
|
|
952
933
|
continue
|
|
953
|
-
if
|
|
934
|
+
if is_parallel_mode and is_graph_mode and (not param.sliced or param.has_init):
|
|
954
935
|
continue
|
|
955
936
|
if choice_func is not None and not choice_func(param.name):
|
|
956
937
|
continue
|
|
@@ -974,12 +955,6 @@ def _convert_cell_to_param_list(save_obj, integrated_save, append_dict, choice_f
|
|
|
974
955
|
if not is_parallel_mode:
|
|
975
956
|
save_obj.init_parameters_data()
|
|
976
957
|
param_dict = _convert_cell_param_and_names_to_dict(save_obj, choice_func, is_parallel_mode)
|
|
977
|
-
if append_dict and "random_op" in append_dict:
|
|
978
|
-
phase = 'train' + '.' + str(save_obj.create_time) + '.' + str(id(save_obj)) + '.' + save_obj.arguments_key
|
|
979
|
-
if phase in save_obj.compile_cache and _executor.has_compiled(phase):
|
|
980
|
-
random_byte = _executor._graph_executor.get_random_status(phase)
|
|
981
|
-
param_list.append({"name": "random_op", "data": random_byte})
|
|
982
|
-
append_dict.pop("random_op")
|
|
983
958
|
for (key, value) in param_dict.items():
|
|
984
959
|
each_param = {"name": key}
|
|
985
960
|
if isinstance(value, MapParameter):
|
|
@@ -1002,15 +977,14 @@ def _convert_cell_to_param_list(save_obj, integrated_save, append_dict, choice_f
|
|
|
1002
977
|
param_data.append(str(param_tensor.dtype))
|
|
1003
978
|
param_data.append(value.key)
|
|
1004
979
|
else:
|
|
1005
|
-
param_data = value.data
|
|
1006
980
|
if append_dict and "__exception_save__" in append_dict:
|
|
1007
981
|
param_data = Tensor(Tensor_.move_to(value, "CPU", False))
|
|
982
|
+
else:
|
|
983
|
+
param_data = Tensor(value.data)
|
|
1008
984
|
|
|
1009
985
|
# in automatic model parallel scenario, some parameters were split to all the devices,
|
|
1010
986
|
# which should be combined before saving
|
|
1011
987
|
if key in parameter_layout_dict:
|
|
1012
|
-
if not append_dict or "__exception_save__" not in append_dict:
|
|
1013
|
-
param_data = Tensor(value.data)
|
|
1014
988
|
param_data = _get_merged_param_data(save_obj, parameter_layout_dict, key, param_data,
|
|
1015
989
|
integrated_save)
|
|
1016
990
|
|
|
@@ -1215,12 +1189,26 @@ def _check_param_type(param_config, key, target_type, requested):
|
|
|
1215
1189
|
return None
|
|
1216
1190
|
|
|
1217
1191
|
|
|
1192
|
+
def _check_remove_redundancy(remove_redundancy, f):
|
|
1193
|
+
"""Check whether remove_redundancy is consistent with the safetensors file."""
|
|
1194
|
+
if f.metadata() is not None and "remove_redundancy" in f.metadata().keys():
|
|
1195
|
+
if f.metadata()["remove_redundancy"] == "True" and not remove_redundancy:
|
|
1196
|
+
logger.warning("For 'load_checkpoint', the safetensors file is deduplicated, "
|
|
1197
|
+
"but remove_redundancy is set to False.")
|
|
1198
|
+
return True
|
|
1199
|
+
if f.metadata()["remove_redundancy"] == "False" and remove_redundancy:
|
|
1200
|
+
logger.warning("For 'load_checkpoint', the safetensors file is non-deduplicated, "
|
|
1201
|
+
"but remove_redundancy is set to True.")
|
|
1202
|
+
return False
|
|
1203
|
+
return remove_redundancy
|
|
1204
|
+
|
|
1205
|
+
|
|
1218
1206
|
def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter_prefix, choice_func, dec_key,
|
|
1219
|
-
dec_mode, crc_check, format):
|
|
1207
|
+
dec_mode, crc_check, format, remove_redundancy):
|
|
1220
1208
|
"""load parameter into parameter_dict"""
|
|
1221
1209
|
ckpt_file_name = _check_ckpt_file_name(ckpt_file_name, format)
|
|
1222
1210
|
if format == "safetensors":
|
|
1223
|
-
with
|
|
1211
|
+
with _fast_safe_open(ckpt_file_name, framework='np') as f:
|
|
1224
1212
|
cal_crc_num = 0
|
|
1225
1213
|
total_io_cost_time = 0
|
|
1226
1214
|
for k in sorted(f.keys()):
|
|
@@ -1234,8 +1222,13 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
|
|
|
1234
1222
|
io_end_time = time.time()
|
|
1235
1223
|
io_cost_time = io_end_time - io_start_time
|
|
1236
1224
|
total_io_cost_time += io_cost_time
|
|
1237
|
-
|
|
1238
|
-
|
|
1225
|
+
if f.metadata() is not None and k in f.metadata().keys():
|
|
1226
|
+
sf_dtype = f.metadata()[k]
|
|
1227
|
+
ms_dtype = safetensors_to_mstype[sf_dtype]
|
|
1228
|
+
parameter_dict[k] = Parameter(Tensor(value, dtype=ms_dtype))
|
|
1229
|
+
else:
|
|
1230
|
+
parameter_dict[k] = Parameter(Tensor.from_numpy(value))
|
|
1231
|
+
remove_redundancy = _check_remove_redundancy(remove_redundancy, f)
|
|
1239
1232
|
vlog_print("1", "ME", __file__, sys._getframe().f_lineno,
|
|
1240
1233
|
f"Load safetensors io cost time:{total_io_cost_time}.")
|
|
1241
1234
|
if crc_check:
|
|
@@ -1248,7 +1241,7 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
|
|
|
1248
1241
|
if cal_crc_num != crc_num:
|
|
1249
1242
|
raise ValueError("For 'load_checkpoint', the crc check has failed. "
|
|
1250
1243
|
"Please check whether the ckpt file is damaged.")
|
|
1251
|
-
return
|
|
1244
|
+
return remove_redundancy
|
|
1252
1245
|
checkpoint_list = _parse_ckpt_proto(ckpt_file_name, dec_key, dec_mode, crc_check)
|
|
1253
1246
|
try:
|
|
1254
1247
|
param_data_list = []
|
|
@@ -1261,9 +1254,6 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
|
|
|
1261
1254
|
logger.warning("For load_checkpoint, this parameter `filter_prefix` will be deprecated, "
|
|
1262
1255
|
"please use `choice_func` instead.")
|
|
1263
1256
|
for element_id, element in enumerate(checkpoint_list.value):
|
|
1264
|
-
if element.tag == "random_op":
|
|
1265
|
-
parameter_dict["random_op"] = element.tensor.tensor_content
|
|
1266
|
-
continue
|
|
1267
1257
|
if not _whether_load_param(specify_prefix, filter_prefix, element.tag):
|
|
1268
1258
|
continue
|
|
1269
1259
|
if specify_prefix is None and filter_prefix is None and \
|
|
@@ -1301,6 +1291,7 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
|
|
|
1301
1291
|
_offload_if_config(parameter)
|
|
1302
1292
|
|
|
1303
1293
|
logger.info("Loading checkpoint files process is finished.")
|
|
1294
|
+
return remove_redundancy
|
|
1304
1295
|
|
|
1305
1296
|
except BaseException as e:
|
|
1306
1297
|
logger.critical("Failed to load the checkpoint file '%s'.", ckpt_file_name)
|
|
@@ -1320,6 +1311,9 @@ def load_checkpoint(ckpt_file_name, net=None, strict_load=False, filter_prefix=N
|
|
|
1320
1311
|
And using either of those two args will override `choice_func` at the same time.
|
|
1321
1312
|
- If none of the parameters are loaded from checkpoint file, it will throw ValueError.
|
|
1322
1313
|
- When loading a checkpoint that has removed redundancy, the network should be compiled.
|
|
1314
|
+
- When `net` is not None, it will verify whether the `remove_redundancy` parameter matches the
|
|
1315
|
+
deduplication flag in the loaded safetensors file. If they are different, load the file according to
|
|
1316
|
+
the deduplication flag in the file.
|
|
1323
1317
|
|
|
1324
1318
|
Args:
|
|
1325
1319
|
ckpt_file_name (str): Checkpoint file name.
|
|
@@ -1398,7 +1392,7 @@ def load_checkpoint(ckpt_file_name, net=None, strict_load=False, filter_prefix=N
|
|
|
1398
1392
|
dec_mode = Validator.check_isinstance('dec_mode', dec_mode, str)
|
|
1399
1393
|
crc_check = Validator.check_isinstance('crc_check', crc_check, bool)
|
|
1400
1394
|
remove_redundancy = Validator.check_isinstance('remove_redundancy', remove_redundancy, bool)
|
|
1401
|
-
|
|
1395
|
+
_check_load_checkpoint_unsupported_param(format, dec_key, dec_mode)
|
|
1402
1396
|
logger.info("Execute the process of loading checkpoint files.")
|
|
1403
1397
|
|
|
1404
1398
|
parameter_dict = {}
|
|
@@ -1424,8 +1418,8 @@ def load_checkpoint(ckpt_file_name, net=None, strict_load=False, filter_prefix=N
|
|
|
1424
1418
|
f"passed the CRC check and has been corrupted.")
|
|
1425
1419
|
parameter_dict[key] = Parameter(Tensor(value[0]), name=key)
|
|
1426
1420
|
else:
|
|
1427
|
-
_load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter_prefix,
|
|
1428
|
-
|
|
1421
|
+
remove_redundancy = _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter_prefix,
|
|
1422
|
+
choice_func, dec_key, dec_mode, crc_check, format, remove_redundancy)
|
|
1429
1423
|
|
|
1430
1424
|
if not parameter_dict:
|
|
1431
1425
|
raise ValueError(f"The loaded parameter dict is empty after filter or specify, please check whether "
|
|
@@ -1672,9 +1666,22 @@ def _check_load_param_into_net(net, parameter_dict):
|
|
|
1672
1666
|
msg = ("For 'load_param_into_net', the argument 'parameter_dict' should be a dict, "
|
|
1673
1667
|
"but got {}.".format(type(parameter_dict)))
|
|
1674
1668
|
raise TypeError(msg)
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1669
|
+
for key, value in parameter_dict.items():
|
|
1670
|
+
if not isinstance(key, str) or not isinstance(value, (Parameter, str, list)):
|
|
1671
|
+
logger.critical("Load parameters into net failed.")
|
|
1672
|
+
msg = ("For 'parameter_dict', the element in the argument 'parameter_dict' should be a "
|
|
1673
|
+
"'str' and 'Parameter' , but got {} and {}.".format(type(key), type(value)))
|
|
1674
|
+
raise TypeError(msg)
|
|
1675
|
+
|
|
1676
|
+
|
|
1677
|
+
def _check_remove_redundancy_net(net):
|
|
1678
|
+
"""Check whether the network is compiled with the remove_redundancy feature."""
|
|
1679
|
+
if get_group_size() == 1:
|
|
1680
|
+
raise TypeError(f"The deduplication feature for loading checkpoint can only be used "
|
|
1681
|
+
f"in parallel scenarios, but got stand_alone.")
|
|
1682
|
+
if not net.compile_cache and not net.parameter_layout_dict:
|
|
1683
|
+
raise ValueError("When loading a parameter dict that has removed redundancy, "
|
|
1684
|
+
"the network should be compiled.")
|
|
1678
1685
|
|
|
1679
1686
|
|
|
1680
1687
|
def load_param_into_net(net, parameter_dict, strict_load=False, remove_redundancy=False):
|
|
@@ -1721,18 +1728,14 @@ def load_param_into_net(net, parameter_dict, strict_load=False, remove_redundanc
|
|
|
1721
1728
|
<https://mindspore.cn/tutorials/en/master/beginner/save_load.html#saving-and-loading-the-model-weight>`_
|
|
1722
1729
|
"""
|
|
1723
1730
|
_check_load_param_into_net(net, parameter_dict)
|
|
1724
|
-
for key, value in parameter_dict.items():
|
|
1725
|
-
if not isinstance(key, str) or not isinstance(value, (Parameter, str, list)):
|
|
1726
|
-
logger.critical("Load parameters into net failed.")
|
|
1727
|
-
msg = ("For 'parameter_dict', the element in the argument 'parameter_dict' should be a "
|
|
1728
|
-
"'str' and 'Parameter' , but got {} and {}.".format(type(key), type(value)))
|
|
1729
|
-
raise TypeError(msg)
|
|
1730
1731
|
|
|
1731
1732
|
strict_load = Validator.check_bool(strict_load)
|
|
1732
1733
|
remove_redundancy = Validator.check_isinstance('remove_redundancy', remove_redundancy, bool)
|
|
1733
1734
|
logger.info("Execute the process of loading parameters into net.")
|
|
1734
1735
|
param_not_load = []
|
|
1736
|
+
param_loaded = set()
|
|
1735
1737
|
ckpt_not_load = list(parameter_dict.keys())
|
|
1738
|
+
is_parallel_mode = _is_auto_parallel_mode(net)
|
|
1736
1739
|
for _, param in net.parameters_and_names():
|
|
1737
1740
|
if param.param_info.is_pipeline_shared_param:
|
|
1738
1741
|
continue
|
|
@@ -1748,22 +1751,23 @@ def load_param_into_net(net, parameter_dict, strict_load=False, remove_redundanc
|
|
|
1748
1751
|
if hasattr(param, "init_param") and not param.init_param:
|
|
1749
1752
|
param.init_param = True
|
|
1750
1753
|
ckpt_not_load.remove(param.name)
|
|
1754
|
+
param_loaded.add(param.name)
|
|
1751
1755
|
else:
|
|
1756
|
+
if param.name.startswith("accu_grads"):
|
|
1757
|
+
continue
|
|
1758
|
+
if param.param_info.is_pipeline_shared_param:
|
|
1759
|
+
continue
|
|
1760
|
+
if is_parallel_mode and not param.sliced:
|
|
1761
|
+
continue
|
|
1752
1762
|
param_not_load.append(param.name)
|
|
1753
1763
|
|
|
1754
1764
|
if param_not_load and not strict_load:
|
|
1755
1765
|
_load_dismatch_prefix_params(net, parameter_dict, param_not_load, strict_load)
|
|
1756
1766
|
|
|
1757
1767
|
if remove_redundancy:
|
|
1758
|
-
|
|
1759
|
-
raise TypeError(f"The deduplication feature for loading checkpoint can only be used "
|
|
1760
|
-
f"in parallel scenarios, but got stand_alone.")
|
|
1761
|
-
if not net.compile_cache and not net.parameter_layout_dict:
|
|
1762
|
-
raise ValueError("When loading a parameter dict that has removed redundancy, "
|
|
1763
|
-
"the network should be compiled.")
|
|
1768
|
+
_check_remove_redundancy_net(net)
|
|
1764
1769
|
param_layout = net.parameter_layout_dict
|
|
1765
|
-
_single_parameter_broadcast(net, param_layout, param_not_load)
|
|
1766
|
-
mindspore.hal.synchronize()
|
|
1770
|
+
_single_parameter_broadcast(net, param_layout, param_not_load, param_loaded)
|
|
1767
1771
|
|
|
1768
1772
|
logger.info("Loading parameters into net is finished.")
|
|
1769
1773
|
if param_not_load:
|
|
@@ -1878,9 +1882,10 @@ def _save_graph(network, file_name):
|
|
|
1878
1882
|
file_name (str): Graph file name into which the graph will be saved.
|
|
1879
1883
|
"""
|
|
1880
1884
|
logger.info("Execute the process of saving graph.")
|
|
1881
|
-
|
|
1882
1885
|
file_name = os.path.realpath(file_name)
|
|
1883
1886
|
graph_pb = network.get_func_graph_proto()
|
|
1887
|
+
if os.path.isfile(file_name) and graph_pb:
|
|
1888
|
+
os.remove(file_name)
|
|
1884
1889
|
if graph_pb:
|
|
1885
1890
|
with open(file_name, "wb") as f:
|
|
1886
1891
|
os.chmod(file_name, stat.S_IRUSR | stat.S_IWUSR)
|
|
@@ -2242,7 +2247,7 @@ def _get_data_file(is_encrypt, kwargs, data_file_name):
|
|
|
2242
2247
|
if is_encrypt():
|
|
2243
2248
|
place_holder_data = _encrypt(place_holder_data, len(place_holder_data), kwargs["enc_key"],
|
|
2244
2249
|
len(kwargs["enc_key"]), kwargs["enc_mode"])
|
|
2245
|
-
parameter_size =
|
|
2250
|
+
parameter_size = offset / 1024
|
|
2246
2251
|
try:
|
|
2247
2252
|
f = open(data_file_name, "wb")
|
|
2248
2253
|
f.write(place_holder_data)
|
|
@@ -2284,9 +2289,11 @@ def _split_save(net_dict, model, file_name, is_encrypt, **kwargs):
|
|
|
2284
2289
|
external_local = os.path.join(file_prefix + "_variables", "data_" + str(index))
|
|
2285
2290
|
data_file_name = os.path.join(dirname, external_local)
|
|
2286
2291
|
f, parameter_size, offset = _get_data_file(is_encrypt, kwargs, data_file_name)
|
|
2292
|
+
|
|
2293
|
+
round = 0
|
|
2294
|
+
names = []
|
|
2295
|
+
|
|
2287
2296
|
try:
|
|
2288
|
-
round = 0
|
|
2289
|
-
names = []
|
|
2290
2297
|
for param_proto in model.graph.parameter:
|
|
2291
2298
|
name = param_proto.name[param_proto.name.find(":") + 1:]
|
|
2292
2299
|
names.append((name, param_proto))
|
|
@@ -2730,28 +2737,35 @@ def convert_model(mindir_file, convert_file, file_format):
|
|
|
2730
2737
|
export(net, *net_input, file_name=convert_file, file_format=file_format)
|
|
2731
2738
|
|
|
2732
2739
|
|
|
2733
|
-
def
|
|
2734
|
-
return _load_and_transform(path, name_map, mindspore.load_checkpoint,
|
|
2740
|
+
def _load_ckpt_to_new_name_map(path, name_map=None):
|
|
2741
|
+
return _load_and_transform(path, name_map, mindspore.load_checkpoint, None)
|
|
2735
2742
|
|
|
2736
2743
|
|
|
2737
|
-
def
|
|
2738
|
-
|
|
2744
|
+
def _load_sf_to_new_name_map(path, name_map=None):
|
|
2745
|
+
load_func = partial(mindspore.load_checkpoint, format="safetensors")
|
|
2746
|
+
return _load_and_transform(path, name_map, load_func, None)
|
|
2739
2747
|
|
|
2740
2748
|
|
|
2741
2749
|
def _process_file(file_info):
|
|
2742
2750
|
cur_ckpt_path, name_map, save_path, file = file_info
|
|
2743
|
-
|
|
2751
|
+
if name_map is not None:
|
|
2752
|
+
param_dict = _load_ckpt_to_new_name_map(cur_ckpt_path, name_map)
|
|
2753
|
+
else:
|
|
2754
|
+
param_dict = mindspore.load_checkpoint(cur_ckpt_path)
|
|
2744
2755
|
safetensors_filename = file.replace(".ckpt", ".safetensors")
|
|
2745
2756
|
dst_file = os.path.join(save_path, safetensors_filename)
|
|
2746
|
-
|
|
2757
|
+
mindspore.save_checkpoint(param_dict, dst_file, format='safetensors')
|
|
2747
2758
|
|
|
2748
2759
|
|
|
2749
2760
|
def _process_file_safetensors(file_info):
|
|
2750
2761
|
cur_safe_path, name_map, save_path, file = file_info
|
|
2751
|
-
|
|
2762
|
+
if name_map is not None:
|
|
2763
|
+
param_dict = _load_sf_to_new_name_map(cur_safe_path, name_map)
|
|
2764
|
+
else:
|
|
2765
|
+
param_dict = mindspore.load_checkpoint(cur_safe_path, format="safetensors")
|
|
2752
2766
|
ckpt_filename = file.replace(".safetensors", ".ckpt")
|
|
2753
2767
|
dst_file = os.path.join(save_path, ckpt_filename)
|
|
2754
|
-
mindspore.save_checkpoint(
|
|
2768
|
+
mindspore.save_checkpoint(param_dict, dst_file)
|
|
2755
2769
|
|
|
2756
2770
|
|
|
2757
2771
|
def _gather_safetensors_tasks(file_path, save_path, file_name_regex, name_map):
|
|
@@ -2862,10 +2876,14 @@ def ckpt_to_safetensors(file_path, save_path=None, name_map=None, file_name_rege
|
|
|
2862
2876
|
if save_path and not os.path.exists(save_path):
|
|
2863
2877
|
os.makedirs(save_path, exist_ok=True)
|
|
2864
2878
|
|
|
2865
|
-
|
|
2879
|
+
if name_map is not None:
|
|
2880
|
+
param_dict = _load_ckpt_to_new_name_map(file_path, name_map)
|
|
2881
|
+
else:
|
|
2882
|
+
param_dict = mindspore.load_checkpoint(file_path)
|
|
2883
|
+
|
|
2866
2884
|
safetensors_filename = os.path.basename(file_path).replace(".ckpt", ".safetensors")
|
|
2867
2885
|
dst_file = os.path.join(save_path if save_path else os.path.dirname(file_path), safetensors_filename)
|
|
2868
|
-
|
|
2886
|
+
mindspore.save_checkpoint(param_dict, dst_file, format='safetensors')
|
|
2869
2887
|
|
|
2870
2888
|
|
|
2871
2889
|
def safetensors_to_ckpt(file_path, save_path=None, name_map=None, file_name_regex=None, processes_num=1):
|
|
@@ -2924,10 +2942,14 @@ def safetensors_to_ckpt(file_path, save_path=None, name_map=None, file_name_rege
|
|
|
2924
2942
|
if save_path and not os.path.exists(save_path):
|
|
2925
2943
|
os.makedirs(save_path, exist_ok=True)
|
|
2926
2944
|
|
|
2927
|
-
|
|
2945
|
+
if name_map is not None:
|
|
2946
|
+
param_dict = _load_sf_to_new_name_map(file_path, name_map)
|
|
2947
|
+
else:
|
|
2948
|
+
param_dict = mindspore.load_checkpoint(file_path, format="safetensors")
|
|
2949
|
+
|
|
2928
2950
|
ckpt_filename = os.path.basename(file_path).replace(".safetensors", ".ckpt")
|
|
2929
2951
|
dst_file = os.path.join(save_path if save_path else os.path.dirname(file_path), ckpt_filename)
|
|
2930
|
-
mindspore.save_checkpoint(
|
|
2952
|
+
mindspore.save_checkpoint(param_dict, dst_file)
|
|
2931
2953
|
|
|
2932
2954
|
|
|
2933
2955
|
def restore_group_info_list(group_info_file_name):
|
|
@@ -369,7 +369,19 @@ class SummaryRecord:
|
|
|
369
369
|
global SUMMARY_TENSOR_CACHE
|
|
370
370
|
for tag in tags:
|
|
371
371
|
item_name = name + tag
|
|
372
|
+
time_out = 30
|
|
373
|
+
start_time = time.time()
|
|
374
|
+
last_size = len(SUMMARY_TENSOR_CACHE)
|
|
372
375
|
while item_name not in SUMMARY_TENSOR_CACHE:
|
|
376
|
+
current_size = len(SUMMARY_TENSOR_CACHE)
|
|
377
|
+
if current_size != last_size:
|
|
378
|
+
start_time = time.time()
|
|
379
|
+
last_size = current_size
|
|
380
|
+
if time.time() - start_time > time_out:
|
|
381
|
+
raise RuntimeError(
|
|
382
|
+
f"For '{self.__class__.__name__}', {tag} summary op sync tag "
|
|
383
|
+
f"was not received within {time_out} seconds, indicating potential mbuf issues."
|
|
384
|
+
)
|
|
373
385
|
time.sleep(0.004)
|
|
374
386
|
|
|
375
387
|
with _summary_lock:
|
|
@@ -416,8 +428,7 @@ class SummaryRecord:
|
|
|
416
428
|
if graph_proto is None and train_network is not None:
|
|
417
429
|
graph_proto = _cell_graph_executor.get_optimize_graph_proto(train_network)
|
|
418
430
|
if graph_proto is None:
|
|
419
|
-
|
|
420
|
-
logger.error("Failed to get proto for graph.")
|
|
431
|
+
logger.warning("Failed to get proto for graph.")
|
|
421
432
|
else:
|
|
422
433
|
self._event_writer.write({'graph': [{'step': step, 'value': graph_proto}]})
|
|
423
434
|
self._status['has_graph'] = True
|
mindspore/turbojpeg.dll
CHANGED
|
Binary file
|
mindspore/utils/__init__.py
CHANGED
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
from mindspore._c_expression import stress_detect, _reuse_data_ptr
|
|
18
18
|
from .utils import ExitByRequest, _tft_handler
|
|
19
|
-
from .runtime_execution_order_check import runtime_execution_order_check
|
|
19
|
+
from .runtime_execution_order_check import runtime_execution_order_check, comm_exec_order_check
|
|
20
|
+
from .sdc_detect import sdc_detect_start, sdc_detect_stop, get_sdc_detect_result
|
|
20
21
|
from . import dryrun
|
|
21
22
|
|
|
22
23
|
# Symbols from utils module.
|
|
23
24
|
__all__ = ["stress_detect", "ExitByRequest", "runtime_execution_order_check", "dryrun", "_reuse_data_ptr",
|
|
24
|
-
"_tft_handler"]
|
|
25
|
+
"_tft_handler", "comm_exec_order_check", "sdc_detect_start", "sdc_detect_stop", "get_sdc_detect_result"]
|
mindspore/utils/dryrun.py
CHANGED
|
@@ -17,7 +17,6 @@ import traceback
|
|
|
17
17
|
import os
|
|
18
18
|
from mindspore.common import Tensor
|
|
19
19
|
from mindspore import log as logger
|
|
20
|
-
from mindspore.common._stub_tensor import StubTensor
|
|
21
20
|
from mindspore.common import dtype as mstype
|
|
22
21
|
from mindspore._checkparam import is_stub_tensor
|
|
23
22
|
|
|
@@ -78,12 +77,7 @@ def set_simulation():
|
|
|
78
77
|
Tensor._getitem = obj.inject(Tensor._getitem)
|
|
79
78
|
Tensor.is_contiguous = obj.inject(Tensor.is_contiguous)
|
|
80
79
|
Tensor.flush_from_cache = obj.inject(Tensor.flush_from_cache)
|
|
81
|
-
StubTensor.asnumpy = obj.inject(StubTensor.asnumpy)
|
|
82
|
-
StubTensor._getitem = obj.inject(StubTensor._getitem)
|
|
83
|
-
StubTensor.is_contiguous = obj.inject(StubTensor.is_contiguous)
|
|
84
|
-
StubTensor.flush_from_cache = obj.inject(StubTensor.flush_from_cache)
|
|
85
80
|
Tensor.__str__ = no_inject_traceback_for_print
|
|
86
|
-
StubTensor.__str__ = no_inject_traceback_for_print
|
|
87
81
|
Tensor.tolist = obj.inject(Tensor.tolist)
|
|
88
82
|
Tensor.__int__ = obj.inject(Tensor.__int__)
|
|
89
83
|
Tensor.__float__ = obj.inject(Tensor.__float__)
|