mindspore 2.6.0__cp310-cp310-win_amd64.whl → 2.7.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
mindspore/nn/wrap/loss_scale.py
CHANGED
|
@@ -26,33 +26,31 @@ from mindspore.common import Tensor
|
|
|
26
26
|
from mindspore.common.sparse_tensor import RowTensorInner
|
|
27
27
|
from mindspore.common.parameter import Parameter
|
|
28
28
|
from mindspore.ops.operations.math_ops import NPUGetFloatStatusV2, NPUClearFloatStatusV2
|
|
29
|
-
from mindspore
|
|
30
|
-
from mindspore.ops import composite as C
|
|
31
|
-
from mindspore.ops import operations as P
|
|
29
|
+
from mindspore import ops
|
|
32
30
|
from mindspore.ops.operations.nn_ops import AllFinite
|
|
33
31
|
from mindspore.common import dtype as mstype
|
|
34
32
|
from mindspore._c_expression import MSContext
|
|
35
33
|
from mindspore.run_check._check_version import AscendEnvChecker
|
|
36
34
|
from mindspore import log as logger
|
|
37
35
|
|
|
38
|
-
_grad_scale =
|
|
39
|
-
reciprocal =
|
|
36
|
+
_grad_scale = ops.MultitypeFuncGraph("grad_scale")
|
|
37
|
+
reciprocal = ops.Reciprocal()
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
@_grad_scale.register("Tensor", "Tensor")
|
|
43
41
|
def tensor_grad_scale(scale, grad):
|
|
44
|
-
return grad *
|
|
42
|
+
return grad * ops.cast(reciprocal(scale), ops.dtype(grad))
|
|
45
43
|
|
|
46
44
|
|
|
47
45
|
@_grad_scale.register("Tensor", "RowTensor")
|
|
48
46
|
def tensor_grad_scale_row_tensor(scale, grad):
|
|
49
47
|
return RowTensorInner(grad.indices,
|
|
50
|
-
grad.values *
|
|
48
|
+
grad.values * ops.cast(reciprocal(scale), ops.dtype(grad.values)),
|
|
51
49
|
grad.dense_shape)
|
|
52
50
|
|
|
53
51
|
|
|
54
|
-
_grad_overflow =
|
|
55
|
-
grad_overflow =
|
|
52
|
+
_grad_overflow = ops.MultitypeFuncGraph("_grad_overflow")
|
|
53
|
+
grad_overflow = ops.FloatStatus()
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
@_grad_overflow.register("Tensor")
|
|
@@ -65,8 +63,8 @@ def _tensor_grad_overflow_row_tensor(grad):
|
|
|
65
63
|
return grad_overflow(grad.values)
|
|
66
64
|
|
|
67
65
|
|
|
68
|
-
_ascend_grad_overflow =
|
|
69
|
-
ascend_grad_overflow =
|
|
66
|
+
_ascend_grad_overflow = ops.MultitypeFuncGraph("_ascend_grad_overflow")
|
|
67
|
+
ascend_grad_overflow = ops.IsFinite()
|
|
70
68
|
|
|
71
69
|
|
|
72
70
|
@_ascend_grad_overflow.register("Tensor")
|
|
@@ -74,7 +72,7 @@ def _tensor_ascend_grad_overflow(grad):
|
|
|
74
72
|
status = ascend_grad_overflow(grad)
|
|
75
73
|
base = Tensor(1.0, dtype=mstype.float32)
|
|
76
74
|
output = base - status.all()
|
|
77
|
-
output =
|
|
75
|
+
output = ops.Reshape()(output, ((-1,)))
|
|
78
76
|
return output
|
|
79
77
|
|
|
80
78
|
|
|
@@ -83,7 +81,7 @@ def _tensor_ascend_grad_overflow_row_tensor(grad):
|
|
|
83
81
|
status = ascend_grad_overflow(grad.values)
|
|
84
82
|
base = Tensor(1.0, dtype=mstype.float32)
|
|
85
83
|
output = base - status.all()
|
|
86
|
-
output =
|
|
84
|
+
output = ops.Reshape()(output, ((1,)))
|
|
87
85
|
return output
|
|
88
86
|
|
|
89
87
|
|
|
@@ -154,14 +152,14 @@ class DynamicLossScaleUpdateCell(Cell):
|
|
|
154
152
|
|
|
155
153
|
self.cur_iter = Parameter(Tensor(1, dtype=mstype.int32), name="current_iterator_step")
|
|
156
154
|
self.last_overflow_iter = Parameter(Tensor(0, dtype=mstype.int32), name="last_overflow_iterator_step")
|
|
157
|
-
self.select =
|
|
158
|
-
self.max =
|
|
155
|
+
self.select = ops.Select()
|
|
156
|
+
self.max = ops.Maximum()
|
|
159
157
|
self.minimum_loss_scale = Tensor(1.0, dtype=mstype.float32)
|
|
160
|
-
self.reciprocal =
|
|
161
|
-
self.less_equal =
|
|
162
|
-
self.logic_and =
|
|
163
|
-
self.logic_not =
|
|
164
|
-
self.logic_or =
|
|
158
|
+
self.reciprocal = ops.Reciprocal()
|
|
159
|
+
self.less_equal = ops.LessEqual()
|
|
160
|
+
self.logic_and = ops.LogicalAnd()
|
|
161
|
+
self.logic_not = ops.LogicalNot()
|
|
162
|
+
self.logic_or = ops.LogicalOr()
|
|
165
163
|
self.const_true = Tensor(True, dtype=mstype.bool_)
|
|
166
164
|
|
|
167
165
|
def get_loss_scale(self):
|
|
@@ -187,14 +185,14 @@ class DynamicLossScaleUpdateCell(Cell):
|
|
|
187
185
|
should_inc = self.less_equal(self.scale_window, self.cur_iter - self.last_overflow_iter)
|
|
188
186
|
last_iter_cond = self.logic_or(overflow_cond, should_inc)
|
|
189
187
|
last_overflow_iter = self.select(last_iter_cond, self.cur_iter, self.last_overflow_iter)
|
|
190
|
-
last_iter =
|
|
188
|
+
last_iter = ops.assign(self.last_overflow_iter, last_overflow_iter)
|
|
191
189
|
update_scale_cond = self.logic_and(should_inc, self.logic_not(overflow_cond))
|
|
192
190
|
scale_mul_res = loss_scale_on_overflow * self.scale_factor
|
|
193
191
|
scaled_loss_scale = self.select(update_scale_cond, scale_mul_res, loss_scale_on_overflow)
|
|
194
|
-
|
|
192
|
+
ops.assign(loss_scale, scaled_loss_scale)
|
|
195
193
|
inc_cur_iter = self.cur_iter + 1
|
|
196
|
-
inc_cur_iter =
|
|
197
|
-
|
|
194
|
+
inc_cur_iter = ops.depend(inc_cur_iter, last_iter)
|
|
195
|
+
ops.assign(self.cur_iter, inc_cur_iter)
|
|
198
196
|
return overflow
|
|
199
197
|
|
|
200
198
|
|
|
@@ -360,19 +358,19 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
360
358
|
|
|
361
359
|
def __init__(self, network, optimizer, scale_sense):
|
|
362
360
|
super(TrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
|
|
363
|
-
self.hyper_map =
|
|
361
|
+
self.hyper_map = ops.HyperMap()
|
|
364
362
|
self.base = Tensor(1, mstype.float32)
|
|
365
363
|
self.base0 = Tensor(0, mstype.int32)
|
|
366
|
-
self.reduce_sum =
|
|
367
|
-
self.reduce_all =
|
|
368
|
-
self.less_equal =
|
|
369
|
-
self.equal =
|
|
370
|
-
self.logic_not =
|
|
371
|
-
self.allreduce =
|
|
364
|
+
self.reduce_sum = ops.ReduceSum(keep_dims=False)
|
|
365
|
+
self.reduce_all = ops.ReduceAll(keep_dims=False)
|
|
366
|
+
self.less_equal = ops.LessEqual()
|
|
367
|
+
self.equal = ops.Equal()
|
|
368
|
+
self.logic_not = ops.LogicalNot()
|
|
369
|
+
self.allreduce = ops.AllReduce()
|
|
372
370
|
self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
|
|
373
|
-
self.gpu_target =
|
|
374
|
-
self.ascend_910a_target =
|
|
375
|
-
self.ascend_910b_target =
|
|
371
|
+
self.gpu_target = context.get_context("device_target") == "GPU"
|
|
372
|
+
self.ascend_910a_target = MSContext.get_instance().get_ascend_soc_version() == 'ascend910'
|
|
373
|
+
self.ascend_910b_target = MSContext.get_instance().get_ascend_soc_version() in ['ascend910b', 'ascend910_93']
|
|
376
374
|
self.loss_scaling_manager = None
|
|
377
375
|
self._ascend_check_overflow_mode = os.environ.get('MS_ASCEND_CHECK_OVERFLOW_MODE')
|
|
378
376
|
|
|
@@ -420,9 +418,9 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
420
418
|
scaling_sens = self.scale_sense
|
|
421
419
|
status = Tensor([0] * 8, mstype.int32)
|
|
422
420
|
|
|
423
|
-
scaling_sens_filled =
|
|
421
|
+
scaling_sens_filled = ops.ones_like(loss) * ops.cast(scaling_sens, ops.dtype(loss))
|
|
424
422
|
grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
|
|
425
|
-
grads = self.hyper_map(
|
|
423
|
+
grads = self.hyper_map(ops.partial(_grad_scale, scaling_sens), grads)
|
|
426
424
|
# apply grad reducer on grads
|
|
427
425
|
grads = self.grad_reducer(grads)
|
|
428
426
|
|
|
@@ -431,7 +429,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
431
429
|
overflow = self.process_loss_scale(cond)
|
|
432
430
|
# if there is no overflow, do optimize
|
|
433
431
|
if not overflow:
|
|
434
|
-
loss =
|
|
432
|
+
loss = ops.depend(loss, self.optimizer(grads))
|
|
435
433
|
return loss, cond, scaling_sens
|
|
436
434
|
|
|
437
435
|
def set_sense_scale(self, sens):
|
|
@@ -475,18 +473,18 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
475
473
|
status = Tensor([0] * 8, mstype.int32)
|
|
476
474
|
if self.ascend_910a_target or (self.ascend_910b_target and \
|
|
477
475
|
self._ascend_check_overflow_mode == "SATURATION_MODE"):
|
|
478
|
-
status =
|
|
476
|
+
status = ops.depend(status, pre_cond)
|
|
479
477
|
# clear overflow buffer
|
|
480
478
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
481
|
-
compute_input =
|
|
479
|
+
compute_input = ops.depend(compute_input, clear_status)
|
|
482
480
|
return status, compute_input
|
|
483
481
|
|
|
484
482
|
def _check_overflow_status_on_infnan_mode(self, grad_overflow_check_func, compute_output):
|
|
485
483
|
"""check overflow status on infnan mode."""
|
|
486
|
-
flag_sum = self.hyper_map(
|
|
487
|
-
flag_sum =
|
|
484
|
+
flag_sum = self.hyper_map(ops.partial(grad_overflow_check_func), compute_output)
|
|
485
|
+
flag_sum = ops.AddN()(flag_sum)
|
|
488
486
|
# convert flag_sum to scalar
|
|
489
|
-
flag_sum =
|
|
487
|
+
flag_sum = ops.Reshape()(flag_sum, (()))
|
|
490
488
|
return flag_sum
|
|
491
489
|
|
|
492
490
|
def _get_distributed_overflow_status_on_infnan_mode(self, grad_overflow_check_func, compute_output):
|
|
@@ -506,8 +504,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
506
504
|
overflow = AllFinite()(compute_output)
|
|
507
505
|
|
|
508
506
|
if self.is_distributed:
|
|
509
|
-
overflow =
|
|
510
|
-
overflow =
|
|
507
|
+
overflow = ops.Cast()(overflow, mstype.float32)
|
|
508
|
+
overflow = ops.Cast()(self.allreduce(overflow), mstype.bool_)
|
|
511
509
|
return overflow
|
|
512
510
|
|
|
513
511
|
def _get_gpu_overflow_status(self, compute_output):
|
|
@@ -526,7 +524,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
526
524
|
|
|
527
525
|
def _get_ascend_overflow_status_on_saturation_mode(self, status, compute_output):
|
|
528
526
|
"""get overflow status of ascend on saturation mode"""
|
|
529
|
-
status =
|
|
527
|
+
status = ops.depend(status, compute_output)
|
|
530
528
|
get_status = NPUGetFloatStatusV2()(status)
|
|
531
529
|
|
|
532
530
|
if self.is_distributed:
|
|
@@ -534,15 +532,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
534
532
|
flag_reduce = self.allreduce(get_status)
|
|
535
533
|
# get_status not equal to [0]*8 means overflow
|
|
536
534
|
flag = self.equal(self.base0, flag_reduce)
|
|
537
|
-
status =
|
|
535
|
+
status = ops.depend(status, flag)
|
|
538
536
|
# distributed needs to skip allreduce to avoid its overflow affecting the next step
|
|
539
537
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
540
|
-
flag =
|
|
538
|
+
flag = ops.depend(flag, clear_status)
|
|
541
539
|
overall_finite = self.reduce_all(flag)
|
|
542
540
|
else:
|
|
543
|
-
status =
|
|
541
|
+
status = ops.depend(status, get_status)
|
|
544
542
|
clear_status = NPUClearFloatStatusV2()(status)
|
|
545
|
-
get_status =
|
|
543
|
+
get_status = ops.depend(get_status, clear_status)
|
|
546
544
|
flag = self.equal(self.base0, get_status)
|
|
547
545
|
overall_finite = self.reduce_all(flag)
|
|
548
546
|
overflow = self.logic_not(overall_finite)
|
|
@@ -592,26 +590,26 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|
|
592
590
|
return overflow
|
|
593
591
|
|
|
594
592
|
|
|
595
|
-
grad_scale =
|
|
596
|
-
shard_grad_scale =
|
|
597
|
-
reciprocal =
|
|
593
|
+
grad_scale = ops.MultitypeFuncGraph("grad_scale")
|
|
594
|
+
shard_grad_scale = ops.MultitypeFuncGraph("shard_grad_scale")
|
|
595
|
+
reciprocal = ops.Reciprocal()
|
|
598
596
|
|
|
599
597
|
|
|
600
598
|
@grad_scale.register("Tensor", "Tensor", "Tensor")
|
|
601
599
|
def tensor_grad_scale_pipeline(scale, grad, accu_grad):
|
|
602
|
-
accu_grad =
|
|
600
|
+
accu_grad = ops.depend(accu_grad, grad)
|
|
603
601
|
new_grad = accu_grad * reciprocal(scale)
|
|
604
|
-
accu_grad =
|
|
605
|
-
zeros =
|
|
606
|
-
new_grad =
|
|
602
|
+
accu_grad = ops.depend(accu_grad, new_grad)
|
|
603
|
+
zeros = ops.tensor_mul(accu_grad, 0.0)
|
|
604
|
+
new_grad = ops.depend(new_grad, ops.assign(accu_grad, zeros))
|
|
607
605
|
return new_grad
|
|
608
606
|
|
|
609
607
|
|
|
610
608
|
@shard_grad_scale.register("Tensor", "Tensor", "Tensor")
|
|
611
609
|
def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad):
|
|
612
610
|
new_grad = grad * reciprocal(scale)
|
|
613
|
-
accu_grad =
|
|
614
|
-
new_grad =
|
|
611
|
+
accu_grad = ops.depend(accu_grad, new_grad)
|
|
612
|
+
new_grad = ops.depend(new_grad, ops.assign(accu_grad, ops.zeros_like(accu_grad)))
|
|
615
613
|
return new_grad
|
|
616
614
|
|
|
617
615
|
|
|
@@ -633,23 +631,23 @@ class _TrainGradAccuWithLossScaleCell(TrainOneStepCell):
|
|
|
633
631
|
self.weights = optimizer.parameters
|
|
634
632
|
self.accu_grads = self.weights.clone(prefix="accu_grads", init="zeros")
|
|
635
633
|
self.optimizer = optimizer
|
|
636
|
-
self.grad =
|
|
634
|
+
self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
|
|
637
635
|
self.grad_reducer = nn.Identity()
|
|
638
636
|
self.degree = 1
|
|
639
|
-
self.cast =
|
|
640
|
-
self.alloc_status =
|
|
641
|
-
self.get_status =
|
|
642
|
-
self.clear_before_grad =
|
|
643
|
-
self.reduce_sum =
|
|
637
|
+
self.cast = ops.Cast()
|
|
638
|
+
self.alloc_status = ops.NPUAllocFloatStatus()
|
|
639
|
+
self.get_status = ops.NPUGetFloatStatus()
|
|
640
|
+
self.clear_before_grad = ops.NPUClearFloatStatus()
|
|
641
|
+
self.reduce_sum = ops.ReduceSum(keep_dims=False)
|
|
644
642
|
if self.parallel_mode not in [ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL]:
|
|
645
643
|
raise ValueError(f"ParallelMode must be one of "
|
|
646
644
|
f"[ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL], but found "
|
|
647
645
|
f"{self.parallel_mode}.")
|
|
648
|
-
self.allreduce =
|
|
646
|
+
self.allreduce = ops.AllReduce()
|
|
649
647
|
self.base = Tensor(1, mstype.float32)
|
|
650
|
-
self.less_equal =
|
|
651
|
-
self.hyper_map =
|
|
652
|
-
self.reshape =
|
|
648
|
+
self.less_equal = ops.LessEqual()
|
|
649
|
+
self.hyper_map = ops.HyperMap()
|
|
650
|
+
self.reshape = ops.Reshape()
|
|
653
651
|
self.loss_scaling_manager = None
|
|
654
652
|
if isinstance(scale_sense, Cell):
|
|
655
653
|
self.loss_scaling_manager = scale_sense
|
|
@@ -669,19 +667,19 @@ class _TrainGradAccuWithLossScaleCell(TrainOneStepCell):
|
|
|
669
667
|
loss = self.network(*inputs)
|
|
670
668
|
scaling_sens = self.scale_sense
|
|
671
669
|
init = self.alloc_status()
|
|
672
|
-
scaling_sens_filled =
|
|
673
|
-
scaling_sens_filled =
|
|
670
|
+
scaling_sens_filled = ops.ones_like(loss) * ops.cast(scaling_sens, ops.dtype(loss))
|
|
671
|
+
scaling_sens_filled = ops.depend(scaling_sens_filled, self.clear_before_grad(init))
|
|
674
672
|
grads = self.grad(self.network, self.weights)(*inputs, scaling_sens_filled)
|
|
675
|
-
init =
|
|
673
|
+
init = ops.depend(init, grads)
|
|
676
674
|
get_status = self.get_status(init)
|
|
677
|
-
init =
|
|
675
|
+
init = ops.depend(init, get_status)
|
|
678
676
|
flag_sum = self.reduce_sum(init, (0,))
|
|
679
677
|
if self.opt_shard:
|
|
680
678
|
grads = self.grad_reducer(grads)
|
|
681
|
-
grads = self.hyper_map(
|
|
679
|
+
grads = self.hyper_map(ops.partial(shard_grad_scale, scaling_sens * self.degree), grads, self.accu_grads)
|
|
682
680
|
else:
|
|
683
681
|
accu_grads = self.grad_reducer(self.accu_grads)
|
|
684
|
-
grads = self.hyper_map(
|
|
682
|
+
grads = self.hyper_map(ops.partial(grad_scale, scaling_sens * self.degree), grads, accu_grads)
|
|
685
683
|
# sum overflow flag over devices
|
|
686
684
|
flag_reduce = self.allreduce(flag_sum)
|
|
687
685
|
cond = self.less_equal(self.base, flag_reduce)
|
|
@@ -127,7 +127,7 @@ def asarray_const(a, dtype=None):
|
|
|
127
127
|
# If dtype is not specified, we keep consistent with numpy decision
|
|
128
128
|
# only exceptions are: we use int/float32
|
|
129
129
|
if dtype is None:
|
|
130
|
-
dtype = mstype.
|
|
130
|
+
dtype = mstype._pytype_to_dtype(a.dtype) # pylint:disable=protected-access
|
|
131
131
|
if dtype == mstype.float64:
|
|
132
132
|
dtype = mstype.float32
|
|
133
133
|
elif dtype == mstype.int64:
|
|
@@ -138,7 +138,7 @@ def asarray_const(a, dtype=None):
|
|
|
138
138
|
if isinstance(a, onp.ndarray) and dtype is None:
|
|
139
139
|
if a.dtype is onp.dtype('object'):
|
|
140
140
|
raise TypeError(f"For Tensor conversion, the input_data is {a} that contains unsupported element.")
|
|
141
|
-
dtype = mstype.
|
|
141
|
+
dtype = mstype._pytype_to_dtype(a.dtype) # pylint:disable=protected-access
|
|
142
142
|
a = Tensor.from_numpy(a)
|
|
143
143
|
|
|
144
144
|
return Tensor(a, dtype=dtype)
|
|
@@ -1384,7 +1384,7 @@ def trace(a, offset=0, axis1=0, axis2=1, dtype=None):
|
|
|
1384
1384
|
|
|
1385
1385
|
Note:
|
|
1386
1386
|
- `trace` is currently only used in `mindscience` scientific computing scenarios and
|
|
1387
|
-
|
|
1387
|
+
does not support other usage scenarios.
|
|
1388
1388
|
- `trace` is not supported on Windows platform yet.
|
|
1389
1389
|
|
|
1390
1390
|
Args:
|
|
@@ -2512,8 +2512,8 @@ def _pad_symmetric(arr, pad_width, reflect_type):
|
|
|
2512
2512
|
for i in range(arr.ndim):
|
|
2513
2513
|
array_length = arr.shape[i]
|
|
2514
2514
|
|
|
2515
|
-
has_pad_before =
|
|
2516
|
-
has_pad_after =
|
|
2515
|
+
has_pad_before = pad_width[i][0] > 0
|
|
2516
|
+
has_pad_after = pad_width[i][1] > 0
|
|
2517
2517
|
|
|
2518
2518
|
times_to_pad_before = pad_width[i][0] // array_length + 1
|
|
2519
2519
|
additional_pad_before = pad_width[i][0] % array_length
|
|
@@ -2541,8 +2541,8 @@ def _pad_reflect(arr, pad_width, reflect_type):
|
|
|
2541
2541
|
total_repeats = pad_width[i][0] + pad_width[i][1] + 1
|
|
2542
2542
|
arr = ops.tile(arr, _tuple_setitem((1,) * arr.ndim, i, total_repeats))
|
|
2543
2543
|
else:
|
|
2544
|
-
has_pad_before =
|
|
2545
|
-
has_pad_after =
|
|
2544
|
+
has_pad_before = pad_width[i][0] > 0
|
|
2545
|
+
has_pad_after = pad_width[i][1] > 0
|
|
2546
2546
|
|
|
2547
2547
|
pad_size = array_length - 1
|
|
2548
2548
|
times_to_pad_before = pad_width[i][0] // pad_size + 1
|
mindspore/numpy/fft.py
CHANGED
|
@@ -684,7 +684,7 @@ def ihfft(a, n=None, axis=-1, norm=None):
|
|
|
684
684
|
|
|
685
685
|
Note:
|
|
686
686
|
- `ihfft` is currently only used in `mindscience` scientific computing scenarios and
|
|
687
|
-
|
|
687
|
+
does not support other usage scenarios.
|
|
688
688
|
- `ihfft` is not supported on Windows platform yet.
|
|
689
689
|
|
|
690
690
|
Args:
|
mindspore/numpy/math_ops.py
CHANGED
|
@@ -5809,7 +5809,7 @@ def correlate(a, v, mode='valid'):
|
|
|
5809
5809
|
|
|
5810
5810
|
Note:
|
|
5811
5811
|
- `correlate` is currently only used in `mindscience` scientific computing scenarios and
|
|
5812
|
-
|
|
5812
|
+
does not support other usage scenarios.
|
|
5813
5813
|
- `correlate` is not supported on Windows platform yet.
|
|
5814
5814
|
|
|
5815
5815
|
Args:
|
mindspore/numpy/utils_const.py
CHANGED
|
@@ -70,7 +70,7 @@ def _check_dtype(dtype):
|
|
|
70
70
|
elif dtype is float:
|
|
71
71
|
dtype = mstype.float32
|
|
72
72
|
else:
|
|
73
|
-
dtype = mstype.
|
|
73
|
+
dtype = mstype._pytype_to_dtype(dtype) # pylint:disable=protected-access
|
|
74
74
|
if dtype not in dtype_tuple:
|
|
75
75
|
raise TypeError(f"only {all_types} are allowed for dtype, but got {type(dtype)}")
|
|
76
76
|
return dtype
|
mindspore/opencv_core452.dll
CHANGED
|
Binary file
|
|
Binary file
|
mindspore/opencv_imgproc452.dll
CHANGED
|
Binary file
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"""Generate bprop for comm ops"""
|
|
17
17
|
from __future__ import division
|
|
18
18
|
from __future__ import absolute_import
|
|
19
|
+
import os
|
|
19
20
|
from mindspore import Tensor, Parameter
|
|
20
21
|
import mindspore.common.dtype as mstype
|
|
21
22
|
from mindspore.ops import functional as F
|
|
@@ -27,7 +28,8 @@ from mindspore.ops.operations._inner_ops import issubclass_
|
|
|
27
28
|
from mindspore.common.sparse_tensor import RowTensorInner
|
|
28
29
|
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
29
30
|
from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _HostAllGather, AllReduce,
|
|
30
|
-
NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
|
|
31
|
+
NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
|
|
32
|
+
Broadcast, AllGatherV, ReduceScatterV,
|
|
31
33
|
_GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
|
|
32
34
|
ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
|
|
33
35
|
_VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
|
|
@@ -236,7 +238,6 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
236
238
|
allgather for sparse feature.
|
|
237
239
|
"""
|
|
238
240
|
group = self.group
|
|
239
|
-
global_rank = get_rank()
|
|
240
241
|
dev_num = self.dev_num
|
|
241
242
|
mean_flag = self.mean_flag
|
|
242
243
|
param_name = " "
|
|
@@ -270,6 +271,9 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
270
271
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
271
272
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
272
273
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
274
|
+
if dump_local_norm_path:
|
|
275
|
+
global_rank = get_rank()
|
|
276
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
273
277
|
if dump_device_local_norm:
|
|
274
278
|
# init _squared _squared_device_local_norm
|
|
275
279
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -279,8 +283,7 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
279
283
|
squared_norm = reduce_sum(square((z)))
|
|
280
284
|
if dump_local_norm:
|
|
281
285
|
if dump_local_norm_path:
|
|
282
|
-
z = F.depend(z, tensor_dump(
|
|
283
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
286
|
+
z = F.depend(z, tensor_dump(file, sqrt(squared_norm)))
|
|
284
287
|
else:
|
|
285
288
|
z = F.depend(z, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
286
289
|
if dump_device_local_norm:
|
|
@@ -336,13 +339,15 @@ def get_bprop_all_gather(self):
|
|
|
336
339
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
337
340
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
338
341
|
if param_name and (dump_local_norm or dump_device_local_norm):
|
|
339
|
-
global_rank = get_rank()
|
|
340
342
|
cast = P.Cast()
|
|
341
343
|
ln_print = P.Print()
|
|
342
344
|
tensor_dump = P.TensorDump()
|
|
343
345
|
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
344
346
|
square = P.Square()
|
|
345
347
|
sqrt = P.Sqrt()
|
|
348
|
+
if dump_local_norm_path:
|
|
349
|
+
global_rank = get_rank()
|
|
350
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
346
351
|
if dump_device_local_norm:
|
|
347
352
|
# init _squared _squared_device_local_norm
|
|
348
353
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -352,8 +357,7 @@ def get_bprop_all_gather(self):
|
|
|
352
357
|
squared_norm = reduce_sum(square((dout)))
|
|
353
358
|
if dump_local_norm:
|
|
354
359
|
if dump_local_norm_path:
|
|
355
|
-
dout = F.depend(dout, tensor_dump(
|
|
356
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
360
|
+
dout = F.depend(dout, tensor_dump(file, sqrt(squared_norm)))
|
|
357
361
|
else:
|
|
358
362
|
dout = F.depend(dout, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
359
363
|
if dump_device_local_norm:
|
|
@@ -430,7 +434,6 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
430
434
|
if self.instance_name:
|
|
431
435
|
instance_name = "grad_" + self.instance_name
|
|
432
436
|
reduce_scatter.set_prim_instance_name(instance_name)
|
|
433
|
-
global_rank = get_rank()
|
|
434
437
|
cast = P.Cast()
|
|
435
438
|
dtype = P.DType()
|
|
436
439
|
out_tensor = Tensor(1.0, mstype.float16)
|
|
@@ -443,6 +446,9 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
443
446
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
444
447
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
445
448
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
449
|
+
if dump_local_norm_path:
|
|
450
|
+
global_rank = get_rank()
|
|
451
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
446
452
|
if dump_device_local_norm:
|
|
447
453
|
# init _squared _squared_device_local_norm
|
|
448
454
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
@@ -460,8 +466,7 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
460
466
|
squared_norm = reduce_sum(square((z)))
|
|
461
467
|
if dump_local_norm:
|
|
462
468
|
if dump_local_norm_path:
|
|
463
|
-
z = F.depend(z, tensor_dump(
|
|
464
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
469
|
+
z = F.depend(z, tensor_dump(file, sqrt(squared_norm)))
|
|
465
470
|
else:
|
|
466
471
|
z = F.depend(z, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
467
472
|
if dump_device_local_norm:
|
|
@@ -650,6 +655,38 @@ def get_bprop_all_to_all_v(self):
|
|
|
650
655
|
return bprop
|
|
651
656
|
|
|
652
657
|
|
|
658
|
+
@bprop_getters.register(AllGatherV)
|
|
659
|
+
def get_bprop_all_gather_v(self):
|
|
660
|
+
"""Generate bprop for AllGatherV."""
|
|
661
|
+
all_gather_v_grad = ReduceScatterV(ReduceOp.SUM, self.group)
|
|
662
|
+
if hasattr(self, "instance_name") and self.instance_name:
|
|
663
|
+
instance_name = "grad" + self.instance_name
|
|
664
|
+
all_gather_v_grad.set_prim_instance_name(instance_name)
|
|
665
|
+
|
|
666
|
+
def bprop(x, output_split_sizes, out, dout):
|
|
667
|
+
dx = all_gather_v_grad(dout, output_split_sizes)
|
|
668
|
+
return (dx, zeros_like(output_split_sizes))
|
|
669
|
+
|
|
670
|
+
return bprop
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
@bprop_getters.register(ReduceScatterV)
|
|
674
|
+
def get_bprop_reduce_scatter_v(self):
|
|
675
|
+
"""Generate bprop for ReduceScatterV."""
|
|
676
|
+
reduce_scatter_v_grad = AllGatherV(self.group)
|
|
677
|
+
if hasattr(self, "instance_name") and self.instance_name:
|
|
678
|
+
instance_name = "grad" + self.instance_name
|
|
679
|
+
reduce_scatter_v_grad.set_prim_instance_name(instance_name)
|
|
680
|
+
if self.op != ReduceOp.SUM:
|
|
681
|
+
raise RuntimeError("The reducescatter bprop only support ReduceOp.SUM until now.")
|
|
682
|
+
|
|
683
|
+
def bprop(x, input_split_sizes, out, dout):
|
|
684
|
+
dx = reduce_scatter_v_grad(dout, input_split_sizes)
|
|
685
|
+
return (dx, zeros_like(input_split_sizes))
|
|
686
|
+
|
|
687
|
+
return bprop
|
|
688
|
+
|
|
689
|
+
|
|
653
690
|
@bprop_getters.register(NeighborExchangeV2)
|
|
654
691
|
def get_bprop_neighborexchangev2(self):
|
|
655
692
|
"""Generate bprop for NeighborExchangeV2."""
|
|
@@ -685,11 +722,13 @@ def get_bprop_mirror_operator(self):
|
|
|
685
722
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
686
723
|
dump_local_norm_path = ms.get_auto_parallel_context("dump_local_norm_path")
|
|
687
724
|
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
725
|
+
if dump_local_norm_path:
|
|
726
|
+
global_rank = get_rank()
|
|
727
|
+
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
688
728
|
if dump_device_local_norm:
|
|
689
729
|
# init _squared _squared_device_local_norm
|
|
690
730
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
691
731
|
if dev_num > 1:
|
|
692
|
-
global_rank = get_rank()
|
|
693
732
|
dev_num_r = 1.0 / dev_num
|
|
694
733
|
all_reduce = AllReduce(group=group)
|
|
695
734
|
all_gather = AllGather(group=group)
|
|
@@ -717,8 +756,7 @@ def get_bprop_mirror_operator(self):
|
|
|
717
756
|
squared_norm = reduce_sum(square((dout)))
|
|
718
757
|
if dump_local_norm:
|
|
719
758
|
if dump_local_norm_path:
|
|
720
|
-
dout = F.depend(dout, tensor_dump(
|
|
721
|
-
"/local_norm__" + param_name, sqrt(squared_norm)))
|
|
759
|
+
dout = F.depend(dout, tensor_dump(file, sqrt(squared_norm)))
|
|
722
760
|
else:
|
|
723
761
|
dout = F.depend(dout, ln_print("dump local norm: ", param_name, sqrt(squared_norm)))
|
|
724
762
|
if dump_device_local_norm:
|
|
@@ -41,3 +41,17 @@ def get_bprop_insert_gradient_of(self):
|
|
|
41
41
|
def bprop_tensor_dump(file, input_x, out, dout):
|
|
42
42
|
"""Generate bprop for TensorDump"""
|
|
43
43
|
return file, C.zeros_like(input_x)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@bprop_getters.register(P.DumpGradient)
|
|
47
|
+
def get_bprop_dump_gradient(self):
|
|
48
|
+
"""Generate bprop for DumpGradient"""
|
|
49
|
+
td = P.TensorDump()
|
|
50
|
+
td.add_prim_attr("side_effect_io", False)
|
|
51
|
+
td.add_prim_attr("td_flag", True)
|
|
52
|
+
|
|
53
|
+
def bprop(path, x, input_output, out, dout):
|
|
54
|
+
tded = td(path, dout)
|
|
55
|
+
fdout = F.depend(dout, tded)
|
|
56
|
+
return C.zeros_like(path), fdout, C.zeros_like(input_output)
|
|
57
|
+
return bprop
|
|
@@ -23,15 +23,6 @@ from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
|
23
23
|
from mindspore.ops._grad_experimental.grad_base import bprop_getters
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
@bprop_getters.register("raise")
|
|
27
|
-
def get_bprop_raise(self):
|
|
28
|
-
"""Grad definition for `raise` operation."""
|
|
29
|
-
def bprop(x, y, z, out, dout):
|
|
30
|
-
return x, y, z
|
|
31
|
-
|
|
32
|
-
return bprop
|
|
33
|
-
|
|
34
|
-
|
|
35
26
|
@bprop_getters.register(inner.ParallelResizeBilinear)
|
|
36
27
|
def get_bprop_parallel_resize_bilinear(self):
|
|
37
28
|
"""Grad definition for `ParallelResizeBilinear` operation."""
|
|
@@ -72,6 +72,7 @@ from .pyfunc import _pyfunc_cpu
|
|
|
72
72
|
from .buffer_append import _buffer_append_cpu
|
|
73
73
|
from .buffer_get import _buffer_get_cpu
|
|
74
74
|
from .raise_op import _raise_cpu
|
|
75
|
+
from .joinedstr_op import _joinedstr_cpu
|
|
75
76
|
from .buffer_sample import _buffer_sample_cpu
|
|
76
77
|
from .priority_replay_buffer import _prb_push_op_cpu
|
|
77
78
|
from .priority_replay_buffer import _prb_sample_op_cpu
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,11 +12,17 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ============================================================================
|
|
15
|
-
"""
|
|
16
|
-
from
|
|
15
|
+
"""JoinedStr op"""
|
|
16
|
+
from mindspore.ops.op_info_register import op_info_register, CpuRegOp, DataType
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
joinedstr_op_info = CpuRegOp("JoinedStr") \
|
|
19
|
+
.input(0, "x", "dynamic") \
|
|
20
|
+
.output(0, "y", "dynamic") \
|
|
21
|
+
.dtype_format(DataType.I32_Default, DataType.I32_Default) \
|
|
22
|
+
.get_op_info()
|
|
20
23
|
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
@op_info_register(joinedstr_op_info)
|
|
26
|
+
def _joinedstr_cpu():
|
|
27
|
+
"""JoinedStr cpu register"""
|
|
28
|
+
return
|