mindspore 2.6.0__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +36 -61
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +32 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +76 -15
- mindspore/common/api.py +193 -112
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +48 -83
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +38 -23
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +32 -2
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +208 -5
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +122 -98
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +325 -499
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
- mindspore/ops/auto_generate/gen_extend_func.py +1 -51
- mindspore/ops/auto_generate/gen_ops_def.py +463 -257
- mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +7 -94
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +21 -367
- mindspore/ops/function/nn_func.py +26 -41
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +0 -2
- mindspore/ops/functional_overload.py +463 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +235 -172
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +5 -6
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +36 -4
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +4 -2
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +12 -5
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +3 -1
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +1 -1
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +2 -2
- mindspore/parallel/transform_safetensors.py +462 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +21 -30
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +6 -2
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +71 -13
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +51 -33
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +329 -367
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/nn/wrap/loss_scale.py
CHANGED
|
@@ -26,33 +26,31 @@ from mindspore.common import Tensor
|
|
|
26
26
|
from mindspore.common.sparse_tensor import RowTensorInner
|
|
27
27
|
from mindspore.common.parameter import Parameter
|
|
28
28
|
from mindspore.ops.operations.math_ops import NPUGetFloatStatusV2, NPUClearFloatStatusV2
|
|
29
|
-
from mindspore
|
|
30
|
-
from mindspore.ops import composite as C
|
|
31
|
-
from mindspore.ops import operations as P
|
|
29
|
+
from mindspore import ops
|
|
32
30
|
from mindspore.ops.operations.nn_ops import AllFinite
|
|
33
31
|
from mindspore.common import dtype as mstype
|
|
34
32
|
from mindspore._c_expression import MSContext
|
|
35
33
|
from mindspore.run_check._check_version import AscendEnvChecker
|
|
36
34
|
from mindspore import log as logger
|
|
37
35
|
|
|
38
|
-
_grad_scale =
|
|
39
|
-
reciprocal =
|
|
36
|
+
_grad_scale = ops.MultitypeFuncGraph("grad_scale")
|
|
37
|
+
reciprocal = ops.Reciprocal()
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
@_grad_scale.register("Tensor", "Tensor")
|
|
43
41
|
def tensor_grad_scale(scale, grad):
|
|
44
|
-
return grad *
|
|
42
|
+
return grad * ops.cast(reciprocal(scale), ops.dtype(grad))
|
|
45
43
|
|
|
46
44
|
|
|
47
45
|
@_grad_scale.register("Tensor", "RowTensor")
|
|
48
46
|
def tensor_grad_scale_row_tensor(scale, grad):
|
|
49
47
|
return RowTensorInner(grad.indices,
|
|
50
|
-
grad.values *
|
|
48
|
+
grad.values * ops.cast(reciprocal(scale), ops.dtype(grad.values)),
|
|
51
49
|
grad.dense_shape)
|
|
52
50
|
|
|
53
51
|
|
|
54
|
-
_grad_overflow =
|
|
55
|
-
grad_overflow =
|
|
52
|
+
_grad_overflow = ops.MultitypeFuncGraph("_grad_overflow")
|
|
53
|
+
grad_overflow = ops.FloatStatus()
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
@_grad_overflow.register("Tensor")
|
|
@@ -65,8 +63,8 @@ def _tensor_grad_overflow_row_tensor(grad):
|
|
|
65
63
|
return grad_overflow(grad.values)
|
|
66
64
|
|
|
67
65
|
|
|
68
|
-
_ascend_grad_overflow =
|
|
69
|
-
ascend_grad_overflow =
|
|
66
|
+
_ascend_grad_overflow = ops.MultitypeFuncGraph("_ascend_grad_overflow")
|
|
67
|
+
ascend_grad_overflow = ops.IsFinite()
|
|
70
68
|
|
|
71
69
|
|
|
72
70
|
@_ascend_grad_overflow.register("Tensor")
|
|
@@ -74,7 +72,7 @@ def _tensor_ascend_grad_overflow(grad):
|
|
|
74
72
|
status = ascend_grad_overflow(grad)
|
|
75
73
|
base = Tensor(1.0, dtype=mstype.float32)
|
|
76
74
|
output = base - status.all()
|
|
77
|
-
output =
|
|
75
|
+
output = ops.Reshape()(output, ((-1,)))
|
|
78
76
|
return output
|
|
79
77
|
|
|
80
78
|
|
|
@@ -83,7 +81,7 @@ def _tensor_ascend_grad_overflow_row_tensor(grad):
|
|
|
83
81
|
status = ascend_grad_overflow(grad.values)
|
|
84
82
|
base = Tensor(1.0, dtype=mstype.float32)
|
|
85
83
|
output = base - status.all()
|
|
86
|
-
output =
|
|
84
|
+
output = ops.Reshape()(output, ((1,)))
|
|
87
85
|
return output
|
|
88
86
|
|
|
89
87
|
|
|
@@ -154,14 +152,14 @@ class DynamicLossScaleUpdateCell(Cell):
|
|
|
154
152
|
|
|
155
153
|
self.cur_iter = Parameter(Tensor(1, dtype=mstype.int32), name="current_iterator_step")
|
|
156
154
|
self.last_overflow_iter = Parameter(Tensor(0, dtype=mstype.int32), name="last_overflow_iterator_step")
|
|
157
|
-
self.select =
|
|
158
|
-
self.max =
|
|
155
|
+
self.select = ops.Select()
|
|
156
|
+
self.max = ops.Maximum()
|
|
159
157
|
self.minimum_loss_scale = Tensor(1.0, dtype=mstype.float32)
|
|
160
|
-
self.reciprocal =
|
|
161
|
-
self.less_equal =
|
|
162
|
-
self.logic_and =
|
|
163
|
-
self.logic_not =
|
|
164
|
-
self.logic_or =
|
|
158
|
+
self.reciprocal = ops.Reciprocal()
|
|
159
|
+
self.less_equal = ops.LessEqual()
|
|
160
|
+
self.logic_and = ops.LogicalAnd()
|
|
161
|
+
self.logic_not = ops.LogicalNot()
|
|
162
|
+
self.logic_or = ops.LogicalOr()
|
|
165
163
|
self.const_true = Tensor(True, dtype=mstype.bool_)
|
|
166
164
|
|
|
167
165
|
def get_loss_scale(self):
|
|
@@ -187,14 +185,14 @@ class DynamicLossScaleUpdateCell(Cell):
|
|
|
187
185
|
should_inc = self.less_equal(self.scale_window, self.cur_iter - self.last_overflow_iter)
|
|
188
186
|
last_iter_cond = self.logic_or(overflow_cond, should_inc)
|
|
189
187
|
last_overflow_iter = self.select(last_iter_cond, self.cur_iter, self.last_overflow_iter)
|
|
190
|
-
last_iter =
|
|
188
|
+
last_iter = ops.assign(self.last_overflow_iter, last_overflow_iter)
|
|
191
189
|
update_scale_cond = self.logic_and(should_inc, self.logic_not(overflow_cond))
|
|
192
190
|
scale_mul_res = loss_scale_on_overflow * self.scale_factor
|
|
193
191
|
scaled_loss_scale = self.select(update_scale_cond, scale_mul_res, loss_scale_on_overflow)
|
|
194
|
-
|
|
192
|
+
ops.assign(loss_scale, scaled_loss_scale)
|
|
195
193
|
inc_cur_iter = self.cur_iter + 1
|
|
196
|
-
inc_cur_iter =
|
|
197
|
-
|
|
194
|
+
inc_cur_iter = ops.depend(inc_cur_iter, last_iter)
|
|
195
|
+
ops.assign(self.cur_iter, inc_cur_iter)
|
|
198
196
|
return overflow
|
|
199
197
|
|
|
200
198
|
|
|
@@ -360,19 +358,19 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
360
358
|
|
|
361
359
|
def __init__(self, network, optimizer, scale_sense):
|
|
362
360
|
super(TrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
|
|
363
|
-
self.hyper_map =
|
|
361
|
+
self.hyper_map = ops.HyperMap()
|
|
364
362
|
self.base = Tensor(1, mstype.float32)
|
|
365
363
|
self.base0 = Tensor(0, mstype.int32)
|
|
366
|
-
self.reduce_sum =
|
|
367
|
-
self.reduce_all =
|
|
368
|
-
self.less_equal =
|
|
369
|
-
self.equal =
|
|
370
|
-
self.logic_not =
|
|
371
|
-
self.allreduce =
|
|
364
|
+
self.reduce_sum = ops.ReduceSum(keep_dims=False)
|
|
365
|
+
self.reduce_all = ops.ReduceAll(keep_dims=False)
|
|
366
|
+
self.less_equal = ops.LessEqual()
|
|
367
|
+
self.equal = ops.Equal()
|
|
368
|
+
self.logic_not = ops.LogicalNot()
|
|
369
|
+
self.allreduce = ops.AllReduce()
|
|
372
370
|
self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
|
|
373
|
-
self.gpu_target =
|
|
374
|
-
self.ascend_910a_target =
|
|
375
|
-
self.ascend_910b_target =
|
|
371
|
+
self.gpu_target = context.get_context("device_target") == "GPU"
|
|
372
|
+
self.ascend_910a_target = MSContext.get_instance().get_ascend_soc_version() == 'ascend910'
|
|
373
|
+
self.ascend_910b_target = MSContext.get_instance().get_ascend_soc_version() in ['ascend910b', 'ascend910_93']
|
|
376
374
|
self.loss_scaling_manager = None
|
|
377
375
|
self._ascend_check_overflow_mode = os.environ.get('MS_ASCEND_CHECK_OVERFLOW_MODE')
|
|
378
376
|
|
|
@@ -420,9 +418,9 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
420
418
|
scaling_sens = self.scale_sense
|
|
421
419
|
status = Tensor([0] * 8, mstype.int32)
|
|
422
420
|
|
|
423
|
-
scaling_sens_filled =
|
|
421
|
+
scaling_sens_filled = ops.ones_like(loss) * ops.cast(scaling_sens, ops.dtype(loss))
|
|
424
422
|
grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
|
|
425
|
-
grads = self.hyper_map(
|
|
423
|
+
grads = self.hyper_map(ops.partial(_grad_scale, scaling_sens), grads)
|
|
426
424
|
# apply grad reducer on grads
|
|
427
425
|
grads = self.grad_reducer(grads)
|
|
428
426
|
|
|
@@ -431,7 +429,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
431
429
|
overflow = self.process_loss_scale(cond)
|
|
432
430
|
# if there is no overflow, do optimize
|
|
433
431
|
if not overflow:
|
|
434
|
-
loss =
|
|
432
|
+
loss = ops.depend(loss, self.optimizer(grads))
|
|
435
433
|
return loss, cond, scaling_sens
|
|
436
434
|
|
|
437
435
|
def set_sense_scale(self, sens):
|
|
@@ -475,18 +473,18 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
475
473
|
status = Tensor([0] * 8, mstype.int32)
|
|
476
474
|
if self.ascend_910a_target or (self.ascend_910b_target and \
|
|
477
475
|
self._ascend_check_overflow_mode == "SATURATION_MODE"):
|
|
478
|
-
status =
|
|
476
|
+
status = ops.depend(status, pre_cond)
|
|
479
477
|
# clear overflow buffer
|
|
480
478
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
481
|
-
compute_input =
|
|
479
|
+
compute_input = ops.depend(compute_input, clear_status)
|
|
482
480
|
return status, compute_input
|
|
483
481
|
|
|
484
482
|
def _check_overflow_status_on_infnan_mode(self, grad_overflow_check_func, compute_output):
|
|
485
483
|
"""check overflow status on infnan mode."""
|
|
486
|
-
flag_sum = self.hyper_map(
|
|
487
|
-
flag_sum =
|
|
484
|
+
flag_sum = self.hyper_map(ops.partial(grad_overflow_check_func), compute_output)
|
|
485
|
+
flag_sum = ops.AddN()(flag_sum)
|
|
488
486
|
# convert flag_sum to scalar
|
|
489
|
-
flag_sum =
|
|
487
|
+
flag_sum = ops.Reshape()(flag_sum, (()))
|
|
490
488
|
return flag_sum
|
|
491
489
|
|
|
492
490
|
def _get_distributed_overflow_status_on_infnan_mode(self, grad_overflow_check_func, compute_output):
|
|
@@ -506,8 +504,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
506
504
|
overflow = AllFinite()(compute_output)
|
|
507
505
|
|
|
508
506
|
if self.is_distributed:
|
|
509
|
-
overflow =
|
|
510
|
-
overflow =
|
|
507
|
+
overflow = ops.Cast()(overflow, mstype.float32)
|
|
508
|
+
overflow = ops.Cast()(self.allreduce(overflow), mstype.bool_)
|
|
511
509
|
return overflow
|
|
512
510
|
|
|
513
511
|
def _get_gpu_overflow_status(self, compute_output):
|
|
@@ -526,7 +524,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
526
524
|
|
|
527
525
|
def _get_ascend_overflow_status_on_saturation_mode(self, status, compute_output):
|
|
528
526
|
"""get overflow status of ascend on saturation mode"""
|
|
529
|
-
status =
|
|
527
|
+
status = ops.depend(status, compute_output)
|
|
530
528
|
get_status = NPUGetFloatStatusV2()(status)
|
|
531
529
|
|
|
532
530
|
if self.is_distributed:
|
|
@@ -534,15 +532,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
534
532
|
flag_reduce = self.allreduce(get_status)
|
|
535
533
|
# get_status not equal to [0]*8 means overflow
|
|
536
534
|
flag = self.equal(self.base0, flag_reduce)
|
|
537
|
-
status =
|
|
535
|
+
status = ops.depend(status, flag)
|
|
538
536
|
# distributed needs to skip allreduce to avoid its overflow affecting the next step
|
|
539
537
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
540
|
-
flag =
|
|
538
|
+
flag = ops.depend(flag, clear_status)
|
|
541
539
|
overall_finite = self.reduce_all(flag)
|
|
542
540
|
else:
|
|
543
|
-
status =
|
|
541
|
+
status = ops.depend(status, get_status)
|
|
544
542
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
545
|
-
get_status =
|
|
543
|
+
get_status = ops.depend(get_status, clear_status)
|
|
546
544
|
flag = self.equal(self.base0, get_status)
|
|
547
545
|
overall_finite = self.reduce_all(flag)
|
|
548
546
|
overflow = self.logic_not(overall_finite)
|
|
@@ -592,26 +590,26 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
592
590
|
return overflow
|
|
593
591
|
|
|
594
592
|
|
|
595
|
-
grad_scale =
|
|
596
|
-
shard_grad_scale =
|
|
597
|
-
reciprocal =
|
|
593
|
+
grad_scale = ops.MultitypeFuncGraph("grad_scale")
|
|
594
|
+
shard_grad_scale = ops.MultitypeFuncGraph("shard_grad_scale")
|
|
595
|
+
reciprocal = ops.Reciprocal()
|
|
598
596
|
|
|
599
597
|
|
|
600
598
|
@grad_scale.register("Tensor", "Tensor", "Tensor")
|
|
601
599
|
def tensor_grad_scale_pipeline(scale, grad, accu_grad):
|
|
602
|
-
accu_grad =
|
|
600
|
+
accu_grad = ops.depend(accu_grad, grad)
|
|
603
601
|
new_grad = accu_grad * reciprocal(scale)
|
|
604
|
-
accu_grad =
|
|
605
|
-
zeros =
|
|
606
|
-
new_grad =
|
|
602
|
+
accu_grad = ops.depend(accu_grad, new_grad)
|
|
603
|
+
zeros = ops.tensor_mul(accu_grad, 0.0)
|
|
604
|
+
new_grad = ops.depend(new_grad, ops.assign(accu_grad, zeros))
|
|
607
605
|
return new_grad
|
|
608
606
|
|
|
609
607
|
|
|
610
608
|
@shard_grad_scale.register("Tensor", "Tensor", "Tensor")
|
|
611
609
|
def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad):
|
|
612
610
|
new_grad = grad * reciprocal(scale)
|
|
613
|
-
accu_grad =
|
|
614
|
-
new_grad =
|
|
611
|
+
accu_grad = ops.depend(accu_grad, new_grad)
|
|
612
|
+
new_grad = ops.depend(new_grad, ops.assign(accu_grad, ops.zeros_like(accu_grad)))
|
|
615
613
|
return new_grad
|
|
616
614
|
|
|
617
615
|
|
|
@@ -633,23 +631,23 @@ class _TrainGradAccuWithLossScaleCell(TrainOneStepCell):
|
|
|
633
631
|
self.weights = optimizer.parameters
|
|
634
632
|
self.accu_grads = self.weights.clone(prefix="accu_grads", init="zeros")
|
|
635
633
|
self.optimizer = optimizer
|
|
636
|
-
self.grad =
|
|
634
|
+
self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
|
|
637
635
|
self.grad_reducer = nn.Identity()
|
|
638
636
|
self.degree = 1
|
|
639
|
-
self.cast =
|
|
640
|
-
self.alloc_status =
|
|
641
|
-
self.get_status =
|
|
642
|
-
self.clear_before_grad =
|
|
643
|
-
self.reduce_sum =
|
|
637
|
+
self.cast = ops.Cast()
|
|
638
|
+
self.alloc_status = ops.NPUAllocFloatStatus()
|
|
639
|
+
self.get_status = ops.NPUGetFloatStatus()
|
|
640
|
+
self.clear_before_grad = ops.NPUClearFloatStatus()
|
|
641
|
+
self.reduce_sum = ops.ReduceSum(keep_dims=False)
|
|
644
642
|
if self.parallel_mode not in [ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL]:
|
|
645
643
|
raise ValueError(f"ParallelMode must be one of "
|
|
646
644
|
f"[ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL], but found "
|
|
647
645
|
f"{self.parallel_mode}.")
|
|
648
|
-
self.allreduce =
|
|
646
|
+
self.allreduce = ops.AllReduce()
|
|
649
647
|
self.base = Tensor(1, mstype.float32)
|
|
650
|
-
self.less_equal =
|
|
651
|
-
self.hyper_map =
|
|
652
|
-
self.reshape =
|
|
648
|
+
self.less_equal = ops.LessEqual()
|
|
649
|
+
self.hyper_map = ops.HyperMap()
|
|
650
|
+
self.reshape = ops.Reshape()
|
|
653
651
|
self.loss_scaling_manager = None
|
|
654
652
|
if isinstance(scale_sense, Cell):
|
|
655
653
|
self.loss_scaling_manager = scale_sense
|
|
@@ -669,19 +667,19 @@ class _TrainGradAccuWithLossScaleCell(TrainOneStepCell):
|
|
|
669
667
|
loss = self.network(*inputs)
|
|
670
668
|
scaling_sens = self.scale_sense
|
|
671
669
|
init = self.alloc_status()
|
|
672
|
-
scaling_sens_filled =
|
|
673
|
-
scaling_sens_filled =
|
|
670
|
+
scaling_sens_filled = ops.ones_like(loss) * ops.cast(scaling_sens, ops.dtype(loss))
|
|
671
|
+
scaling_sens_filled = ops.depend(scaling_sens_filled, self.clear_before_grad(init))
|
|
674
672
|
grads = self.grad(self.network, self.weights)(*inputs, scaling_sens_filled)
|
|
675
|
-
init =
|
|
673
|
+
init = ops.depend(init, grads)
|
|
676
674
|
get_status = self.get_status(init)
|
|
677
|
-
init =
|
|
675
|
+
init = ops.depend(init, get_status)
|
|
678
676
|
flag_sum = self.reduce_sum(init, (0,))
|
|
679
677
|
if self.opt_shard:
|
|
680
678
|
grads = self.grad_reducer(grads)
|
|
681
|
-
grads = self.hyper_map(
|
|
679
|
+
grads = self.hyper_map(ops.partial(shard_grad_scale, scaling_sens * self.degree), grads, self.accu_grads)
|
|
682
680
|
else:
|
|
683
681
|
accu_grads = self.grad_reducer(self.accu_grads)
|
|
684
|
-
grads = self.hyper_map(
|
|
682
|
+
grads = self.hyper_map(ops.partial(grad_scale, scaling_sens * self.degree), grads, accu_grads)
|
|
685
683
|
# sum overflow flag over devices
|
|
686
684
|
flag_reduce = self.allreduce(flag_sum)
|
|
687
685
|
cond = self.less_equal(self.base, flag_reduce)
|
|
@@ -1384,7 +1384,7 @@ def trace(a, offset=0, axis1=0, axis2=1, dtype=None):
|
|
|
1384
1384
|
|
|
1385
1385
|
Note:
|
|
1386
1386
|
- `trace` is currently only used in `mindscience` scientific computing scenarios and
|
|
1387
|
-
|
|
1387
|
+
does not support other usage scenarios.
|
|
1388
1388
|
- `trace` is not supported on Windows platform yet.
|
|
1389
1389
|
|
|
1390
1390
|
Args:
|
|
@@ -2512,8 +2512,8 @@ def _pad_symmetric(arr, pad_width, reflect_type):
|
|
|
2512
2512
|
for i in range(arr.ndim):
|
|
2513
2513
|
array_length = arr.shape[i]
|
|
2514
2514
|
|
|
2515
|
-
has_pad_before =
|
|
2516
|
-
has_pad_after =
|
|
2515
|
+
has_pad_before = pad_width[i][0] > 0
|
|
2516
|
+
has_pad_after = pad_width[i][1] > 0
|
|
2517
2517
|
|
|
2518
2518
|
times_to_pad_before = pad_width[i][0] // array_length + 1
|
|
2519
2519
|
additional_pad_before = pad_width[i][0] % array_length
|
|
@@ -2541,8 +2541,8 @@ def _pad_reflect(arr, pad_width, reflect_type):
|
|
|
2541
2541
|
total_repeats = pad_width[i][0] + pad_width[i][1] + 1
|
|
2542
2542
|
arr = ops.tile(arr, _tuple_setitem((1,) * arr.ndim, i, total_repeats))
|
|
2543
2543
|
else:
|
|
2544
|
-
has_pad_before =
|
|
2545
|
-
has_pad_after =
|
|
2544
|
+
has_pad_before = pad_width[i][0] > 0
|
|
2545
|
+
has_pad_after = pad_width[i][1] > 0
|
|
2546
2546
|
|
|
2547
2547
|
pad_size = array_length - 1
|
|
2548
2548
|
times_to_pad_before = pad_width[i][0] // pad_size + 1
|
mindspore/numpy/fft.py
CHANGED
|
@@ -684,7 +684,7 @@ def ihfft(a, n=None, axis=-1, norm=None):
|
|
|
684
684
|
|
|
685
685
|
Note:
|
|
686
686
|
- `ihfft` is currently only used in `mindscience` scientific computing scenarios and
|
|
687
|
-
|
|
687
|
+
does not support other usage scenarios.
|
|
688
688
|
- `ihfft` is not supported on Windows platform yet.
|
|
689
689
|
|
|
690
690
|
Args:
|
mindspore/numpy/math_ops.py
CHANGED
|
@@ -5809,7 +5809,7 @@ def correlate(a, v, mode='valid'):
|
|
|
5809
5809
|
|
|
5810
5810
|
Note:
|
|
5811
5811
|
- `correlate` is currently only used in `mindscience` scientific computing scenarios and
|
|
5812
|
-
|
|
5812
|
+
does not support other usage scenarios.
|
|
5813
5813
|
- `correlate` is not supported on Windows platform yet.
|
|
5814
5814
|
|
|
5815
5815
|
Args:
|
mindspore/opencv_core452.dll
CHANGED
|
Binary file
|
|
Binary file
|
mindspore/opencv_imgproc452.dll
CHANGED
|
Binary file
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"""Generate bprop for comm ops"""
|
|
17
17
|
from __future__ import division
|
|
18
18
|
from __future__ import absolute_import
|
|
19
|
+
import os
|
|
19
20
|
from mindspore import Tensor, Parameter
|
|
20
21
|
import mindspore.common.dtype as mstype
|
|
21
22
|
from mindspore.ops import functional as F
|
|
@@ -27,7 +28,8 @@ from mindspore.ops.operations._inner_ops import issubclass_
|
|
|
27
28
|
from mindspore.common.sparse_tensor import RowTensorInner
|
|
28
29
|
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
29
30
|
from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _HostAllGather, AllReduce,
|
|
30
|
-
NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
|
|
31
|
+
NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
|
|
32
|
+
Broadcast, AllGatherV, ReduceScatterV,
|
|
31
33
|
_GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
|
|
32
34
|
ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
|
|
33
35
|
_VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
|
|
@@ -236,7 +238,6 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
236
238
|
allgather for sparse feature.
|
|
237
239
|
"""
|
|
238
240
|
group = self.group
|
|
239
|
-
global_rank = get_rank()
|
|
240
241
|
dev_num = self.dev_num
|
|
241
242
|
mean_flag = self.mean_flag
|
|
242
243
|
param_name = " "
|
|
@@ -270,6 +271,9 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
270
271
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
271
272
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
272
273
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
274
|
+
if dump_local_norm_path:
|
|
275
|
+
global_rank = get_rank()
|
|
276
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
273
277
|
if dump_device_local_norm:
|
|
274
278
|
# init _squared _squared_device_local_norm
|
|
275
279
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -279,8 +283,7 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
279
283
|
squared_norm = reduce_sum(square((z)))
|
|
280
284
|
if dump_local_norm:
|
|
281
285
|
if dump_local_norm_path:
|
|
282
|
-
z = F.depend(z, tensor_dump(
|
|
283
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
286
|
+
z = F.depend(z, tensor_dump(file, sqrt(squared_norm)))
|
|
284
287
|
else:
|
|
285
288
|
z = F.depend(z, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
286
289
|
if dump_device_local_norm:
|
|
@@ -336,13 +339,15 @@ def get_bprop_all_gather(self):
|
|
|
336
339
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
337
340
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
338
341
|
if param_name and (dump_local_norm or dump_device_local_norm):
|
|
339
|
-
global_rank = get_rank()
|
|
340
342
|
cast = P.Cast()
|
|
341
343
|
ln_print = P.Print()
|
|
342
344
|
tensor_dump = P.TensorDump()
|
|
343
345
|
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
344
346
|
square = P.Square()
|
|
345
347
|
sqrt = P.Sqrt()
|
|
348
|
+
if dump_local_norm_path:
|
|
349
|
+
global_rank = get_rank()
|
|
350
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
346
351
|
if dump_device_local_norm:
|
|
347
352
|
# init _squared _squared_device_local_norm
|
|
348
353
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -352,8 +357,7 @@ def get_bprop_all_gather(self):
|
|
|
352
357
|
squared_norm = reduce_sum(square((dout)))
|
|
353
358
|
if dump_local_norm:
|
|
354
359
|
if dump_local_norm_path:
|
|
355
|
-
dout = F.depend(dout, tensor_dump(
|
|
356
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
360
|
+
dout = F.depend(dout, tensor_dump(file, sqrt(squared_norm)))
|
|
357
361
|
else:
|
|
358
362
|
dout = F.depend(dout, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
359
363
|
if dump_device_local_norm:
|
|
@@ -430,7 +434,6 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
430
434
|
if self.instance_name:
|
|
431
435
|
instance_name = "grad_" + self.instance_name
|
|
432
436
|
reduce_scatter.set_prim_instance_name(instance_name)
|
|
433
|
-
global_rank = get_rank()
|
|
434
437
|
cast = P.Cast()
|
|
435
438
|
dtype = P.DType()
|
|
436
439
|
out_tensor = Tensor(1.0, mstype.float16)
|
|
@@ -443,6 +446,9 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
443
446
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
444
447
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
445
448
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
449
|
+
if dump_local_norm_path:
|
|
450
|
+
global_rank = get_rank()
|
|
451
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
446
452
|
if dump_device_local_norm:
|
|
447
453
|
# init _squared _squared_device_local_norm
|
|
448
454
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -460,8 +466,7 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
460
466
|
squared_norm = reduce_sum(square((z)))
|
|
461
467
|
if dump_local_norm:
|
|
462
468
|
if dump_local_norm_path:
|
|
463
|
-
z = F.depend(z, tensor_dump(
|
|
464
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
469
|
+
z = F.depend(z, tensor_dump(file, sqrt(squared_norm)))
|
|
465
470
|
else:
|
|
466
471
|
z = F.depend(z, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
467
472
|
if dump_device_local_norm:
|
|
@@ -650,6 +655,38 @@ def get_bprop_all_to_all_v(self):
|
|
|
650
655
|
return bprop
|
|
651
656
|
|
|
652
657
|
|
|
658
|
+
@bprop_getters.register(AllGatherV)
|
|
659
|
+
def get_bprop_all_gather_v(self):
|
|
660
|
+
"""Generate bprop for AllGatherV."""
|
|
661
|
+
all_gather_v_grad = ReduceScatterV(ReduceOp.SUM, self.group)
|
|
662
|
+
if hasattr(self, "instance_name") and self.instance_name:
|
|
663
|
+
instance_name = "grad" + self.instance_name
|
|
664
|
+
all_gather_v_grad.set_prim_instance_name(instance_name)
|
|
665
|
+
|
|
666
|
+
def bprop(x, output_split_sizes, out, dout):
|
|
667
|
+
dx = all_gather_v_grad(dout, output_split_sizes)
|
|
668
|
+
return (dx, zeros_like(output_split_sizes))
|
|
669
|
+
|
|
670
|
+
return bprop
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
@bprop_getters.register(ReduceScatterV)
|
|
674
|
+
def get_bprop_reduce_scatter_v(self):
|
|
675
|
+
"""Generate bprop for ReduceScatterV."""
|
|
676
|
+
reduce_scatter_v_grad = AllGatherV(self.group)
|
|
677
|
+
if hasattr(self, "instance_name") and self.instance_name:
|
|
678
|
+
instance_name = "grad" + self.instance_name
|
|
679
|
+
reduce_scatter_v_grad.set_prim_instance_name(instance_name)
|
|
680
|
+
if self.op != ReduceOp.SUM:
|
|
681
|
+
raise RuntimeError("The reducescatter bprop only support ReduceOp.SUM until now.")
|
|
682
|
+
|
|
683
|
+
def bprop(x, input_split_sizes, out, dout):
|
|
684
|
+
dx = reduce_scatter_v_grad(dout, input_split_sizes)
|
|
685
|
+
return (dx, zeros_like(input_split_sizes))
|
|
686
|
+
|
|
687
|
+
return bprop
|
|
688
|
+
|
|
689
|
+
|
|
653
690
|
@bprop_getters.register(NeighborExchangeV2)
|
|
654
691
|
def get_bprop_neighborexchangev2(self):
|
|
655
692
|
"""Generate bprop for NeighborExchangeV2."""
|
|
@@ -685,11 +722,13 @@ def get_bprop_mirror_operator(self):
|
|
|
685
722
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
686
723
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
687
724
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
725
|
+
if dump_local_norm_path:
|
|
726
|
+
global_rank = get_rank()
|
|
727
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
688
728
|
if dump_device_local_norm:
|
|
689
729
|
# init _squared _squared_device_local_norm
|
|
690
730
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
691
731
|
if dev_num > 1:
|
|
692
|
-
global_rank = get_rank()
|
|
693
732
|
dev_num_r = 1.0 / dev_num
|
|
694
733
|
all_reduce = AllReduce(group=group)
|
|
695
734
|
all_gather = AllGather(group=group)
|
|
@@ -717,8 +756,7 @@ def get_bprop_mirror_operator(self):
|
|
|
717
756
|
squared_norm = reduce_sum(square((dout)))
|
|
718
757
|
if dump_local_norm:
|
|
719
758
|
if dump_local_norm_path:
|
|
720
|
-
dout = F.depend(dout, tensor_dump(
|
|
721
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
759
|
+
dout = F.depend(dout, tensor_dump(file, sqrt(squared_norm)))
|
|
722
760
|
else:
|
|
723
761
|
dout = F.depend(dout, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
724
762
|
if dump_device_local_norm:
|
|
@@ -41,3 +41,17 @@ def get_bprop_insert_gradient_of(self):
|
|
|
41
41
|
def bprop_tensor_dump(file, input_x, out, dout):
|
|
42
42
|
"""Generate bprop for TensorDump"""
|
|
43
43
|
return file, C.zeros_like(input_x)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@bprop_getters.register(P.DumpGradient)
|
|
47
|
+
def get_bprop_dump_gradient(self):
|
|
48
|
+
"""Generate bprop for DumpGradient"""
|
|
49
|
+
td = P.TensorDump()
|
|
50
|
+
td.add_prim_attr("side_effect_io", False)
|
|
51
|
+
td.add_prim_attr("td_flag", True)
|
|
52
|
+
|
|
53
|
+
def bprop(path, x, input_output, out, dout):
|
|
54
|
+
tded = td(path, dout)
|
|
55
|
+
fdout = F.depend(dout, tded)
|
|
56
|
+
return C.zeros_like(path), fdout, C.zeros_like(input_output)
|
|
57
|
+
return bprop
|
|
@@ -1527,14 +1527,13 @@ def get_meshgrid_vmap_rule(prim, axis_size):
|
|
|
1527
1527
|
|
|
1528
1528
|
if not isinstance(inputs_bdim, (tuple)):
|
|
1529
1529
|
_raise_value_error("The inputs of P.Meshgrid is not tuple.")
|
|
1530
|
-
|
|
1531
|
-
if len(args) <= 1:
|
|
1530
|
+
if len(inputs_bdim) <= 1:
|
|
1532
1531
|
_raise_value_error(
|
|
1533
1532
|
"The input number of P.Meshgrid must be greater than 1.")
|
|
1534
1533
|
|
|
1535
1534
|
output_shape = []
|
|
1536
1535
|
ones_shape = []
|
|
1537
|
-
for each_arg in
|
|
1536
|
+
for each_arg in inputs_bdim:
|
|
1538
1537
|
x, bdim = each_arg
|
|
1539
1538
|
if bdim is None:
|
|
1540
1539
|
_raise_value_error(
|
|
@@ -1548,22 +1547,16 @@ def get_meshgrid_vmap_rule(prim, axis_size):
|
|
|
1548
1547
|
output_shape.insert(0, axis_size)
|
|
1549
1548
|
ones_shape.insert(0, axis_size)
|
|
1550
1549
|
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
if indexing == Indexing.xy.value:
|
|
1550
|
+
if indexing_bdim[0] == Indexing.xy.value:
|
|
1554
1551
|
output_shape[1], output_shape[2] = output_shape[2], output_shape[1]
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
input_0, _ = args[0]
|
|
1558
|
-
dtype = F.dtype(input_0)
|
|
1559
|
-
ones_tensor = F.fill(dtype, shape, 1)
|
|
1552
|
+
ones_tensor = F.fill(F.dtype(inputs_bdim[0][0]), tuple(output_shape), 1)
|
|
1560
1553
|
|
|
1561
1554
|
index = 0
|
|
1562
1555
|
vals_out_tuple = ()
|
|
1563
|
-
for each_arg in
|
|
1556
|
+
for each_arg in inputs_bdim:
|
|
1564
1557
|
x, bdim = each_arg
|
|
1565
1558
|
x = _bdim_at_front(x, bdim, axis_size)
|
|
1566
|
-
shape_index = (1 - index) if (index <= 1 and
|
|
1559
|
+
shape_index = (1 - index) if (index <= 1 and indexing_bdim[0] == Indexing.xy.value) else index
|
|
1567
1560
|
ones_shape[shape_index + 1] = output_shape[shape_index + 1]
|
|
1568
1561
|
x = P.Reshape()(x, tuple(ones_shape))
|
|
1569
1562
|
output = P.Mul()(x, ones_tensor)
|
|
@@ -2068,14 +2068,15 @@ def get_sparse_apply_adagrad_vmap_rule(prim, axis_size):
|
|
|
2068
2068
|
indices, indices_dim = indices_bdim
|
|
2069
2069
|
if var_dim is None:
|
|
2070
2070
|
if any(dim is not None for dim in [accum_dim, grad_dim, indices_dim]):
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2071
|
+
_raise_value_error("The source axis of `var` is None, but the source "
|
|
2072
|
+
"axis of `accum/grad/indices` is not None. The execution "
|
|
2073
|
+
"order of operator `{}` cannot be guaranteed.".format(prim_name))
|
|
2074
2074
|
var, accum = prim(var, accum, grad, indices, u_monad)
|
|
2075
2075
|
return (var, None), (accum, None)
|
|
2076
2076
|
if var_dim != 0 or accum_dim != var_dim:
|
|
2077
|
-
|
|
2078
|
-
|
|
2077
|
+
_raise_value_error("For `{}`, the source axis of `var` must be equal to `accum`, "
|
|
2078
|
+
"and not equal to 0, but got the source axis of `var`: {}, "
|
|
2079
|
+
"`accum`: {}.".format(prim_name, var_dim, accum_dim))
|
|
2079
2080
|
|
|
2080
2081
|
grad = _bdim_at_front(grad, grad_dim, axis_size)
|
|
2081
2082
|
indices = _bdim_at_front(indices, indices_dim, axis_size)
|
|
@@ -2094,27 +2095,18 @@ def get_sparse_apply_ftrl_vmap_rule(prim, axis_size):
|
|
|
2094
2095
|
else:
|
|
2095
2096
|
batch_rank = 1
|
|
2096
2097
|
|
|
2097
|
-
prim_name = prim.name
|
|
2098
2098
|
batch_prim = _vmap_clone_prim(prim)
|
|
2099
2099
|
batch_prim.add_prim_attr('batch_rank', batch_rank)
|
|
2100
2100
|
|
|
2101
2101
|
def vmap_rule(var_bdim, accum_bdim, linear_bdim, grad_bdim, indices_bdim, u_monad):
|
|
2102
2102
|
var, var_dim = var_bdim
|
|
2103
|
-
accum,
|
|
2104
|
-
linear,
|
|
2103
|
+
accum, _ = accum_bdim
|
|
2104
|
+
linear, _ = linear_bdim
|
|
2105
2105
|
grad, grad_dim = grad_bdim
|
|
2106
2106
|
indices, indices_dim = indices_bdim
|
|
2107
2107
|
if var_dim is None:
|
|
2108
|
-
if any(dim is not None for dim in [accum_dim, linear_dim, grad_dim, indices_dim]):
|
|
2109
|
-
ValueError("The source axis of `var` is None, but the source "
|
|
2110
|
-
"axis of `accum/linear/grad/indices` is not None. The execution order of "
|
|
2111
|
-
"operator `{}` cannot be guaranteed.".format(prim_name))
|
|
2112
2108
|
var, accum, linear = prim(var, accum, linear, grad, indices, u_monad)
|
|
2113
2109
|
return (var, None), (accum, None), (linear, None)
|
|
2114
|
-
if var_dim != 0 or accum_dim != var_dim or linear_dim != var_dim:
|
|
2115
|
-
ValueError("For `{}`, the source axis of `var`, `accum` and `linear` must be equal, and "
|
|
2116
|
-
"not equal to 0, but got the source axis of `var`: {}, `accum`: {}, "
|
|
2117
|
-
"`linear`:{}.".format(prim_name, var_dim, accum_dim, linear_dim))
|
|
2118
2110
|
|
|
2119
2111
|
grad = _bdim_at_front(grad, grad_dim, axis_size)
|
|
2120
2112
|
indices = _bdim_at_front(indices, indices_dim, axis_size)
|