mindspore 2.6.0rc1__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +37 -62
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +43 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +77 -16
- mindspore/common/api.py +238 -113
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +81 -81
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +58 -40
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +33 -3
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +212 -9
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +137 -101
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +328 -502
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +3 -3
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
- mindspore/ops/auto_generate/gen_extend_func.py +23 -141
- mindspore/ops/auto_generate/gen_ops_def.py +727 -321
- mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +9 -96
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +33 -540
- mindspore/ops/function/nn_func.py +28 -74
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +571 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +2 -2
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +294 -174
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +7 -39
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +47 -8
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +11 -8
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +14 -7
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +11 -7
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +6 -7
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +3 -4
- mindspore/parallel/transform_safetensors.py +463 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +30 -32
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +14 -4
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +97 -16
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +135 -55
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +333 -371
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/device_manager.py
CHANGED
|
@@ -15,16 +15,14 @@
|
|
|
15
15
|
|
|
16
16
|
"""Device manager interfaces."""
|
|
17
17
|
|
|
18
|
+
__all__ = ['set_device', 'set_deterministic', 'get_current_device']
|
|
19
|
+
|
|
18
20
|
import os
|
|
19
21
|
from mindspore import log as logger
|
|
20
22
|
from mindspore._c_expression import DeviceManagerConf, DeviceContextManager, MSContext, CollectiveManager
|
|
21
23
|
from mindspore._checkparam import args_type_check
|
|
22
24
|
from mindspore.parallel._ps_context import _need_reset_device_target_for_ps
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
__all__ = ['set_device', 'set_deterministic', 'get_current_device']
|
|
26
|
-
|
|
27
|
-
|
|
28
26
|
class DeviceInfo(tuple):
|
|
29
27
|
"""
|
|
30
28
|
DeviceInfo class. Store the current device target and the corresponding device id.
|
|
@@ -120,14 +118,21 @@ def set_deterministic(deterministic):
|
|
|
120
118
|
"""
|
|
121
119
|
Enables or disables deterministic computing.
|
|
122
120
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
communication operators in the global communication group.
|
|
121
|
+
This configuration is a global configuration, and once enabled, subsequent calculation operations
|
|
122
|
+
will follow the configuration setting. When deterministic computing is enabled, the same output
|
|
123
|
+
is generated if an operator is executed for multiple times with the same hardware and input. This often
|
|
124
|
+
slows down operator execution.
|
|
128
125
|
|
|
129
126
|
The framework not enabled deterministic computation by default.
|
|
130
127
|
|
|
128
|
+
Note:
|
|
129
|
+
- In distributed scenario, we suggest user to set deterministic computing before
|
|
130
|
+
calling :func:`mindspore.communication.init` to enable deterministic operation for
|
|
131
|
+
communication operators in the global communication group.
|
|
132
|
+
- The fixed method for deterministic calculation must be in the same main process as the network,
|
|
133
|
+
operator, etc. Only one deterministic state can be set in the same thread, and it is not recommended
|
|
134
|
+
to set deterministic state multiple times in one thread.
|
|
135
|
+
|
|
131
136
|
Args:
|
|
132
137
|
deterministic (bool): Whether to enable deterministic computing.
|
|
133
138
|
|
|
@@ -139,6 +144,8 @@ def set_deterministic(deterministic):
|
|
|
139
144
|
if DeviceManagerConf.get_instance().is_deterministic_configured():
|
|
140
145
|
raise RuntimeError("The 'mindspore.set_deterministic' can not be set repeatedly.")
|
|
141
146
|
|
|
147
|
+
logger.info(f"Set deterministic setting to '{deterministic}'.")
|
|
148
|
+
|
|
142
149
|
# Must wait for all async created groups to be initialized so that
|
|
143
150
|
# deterministic feature could be consistent between all processes.
|
|
144
151
|
CollectiveManager.get_instance().wait_all_comm_init()
|
mindspore/dnnl.dll
CHANGED
|
Binary file
|
|
@@ -18,8 +18,6 @@ import os
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
from mindspore.common import Tensor, dtype
|
|
20
20
|
from mindspore.experimental.llm_boost.ascend_native.llm_boost import LLMBoost
|
|
21
|
-
from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
|
|
22
|
-
|
|
23
21
|
|
|
24
22
|
def RoundUp(val: int, align: int) -> int:
|
|
25
23
|
if align == 0:
|
|
@@ -44,8 +42,6 @@ def ConvertTensor(nd_mat: np.ndarray, transpose: bool = True, nd2nz: bool = True
|
|
|
44
42
|
nz_mat = nz_mat.reshape(r, c)
|
|
45
43
|
return nz_mat
|
|
46
44
|
|
|
47
|
-
|
|
48
|
-
@LlmBoostRegister.register(LlmBoostType.ASCEND_NATIVE, "Llama")
|
|
49
45
|
class LlamaBoostAscendNative(LLMBoost):
|
|
50
46
|
r"""
|
|
51
47
|
Implements an Llama model in a single kernel.
|
|
@@ -68,7 +64,7 @@ class LlamaBoostAscendNative(LLMBoost):
|
|
|
68
64
|
def _prepare_single_layer(self, ckpt, config, id):
|
|
69
65
|
""" prepares the dictionary of weights of a single layer """
|
|
70
66
|
prefix = 'model.layers.' + str(id)
|
|
71
|
-
is_last =
|
|
67
|
+
is_last = id == config.num_layers-1
|
|
72
68
|
layer = 'layers.' + str(id) + '.'
|
|
73
69
|
l_dict = {key: value for key, value in ckpt.items() if layer in key}
|
|
74
70
|
if config.n_kv_heads is None:
|
|
@@ -96,8 +92,8 @@ class LlamaBoostAscendNative(LLMBoost):
|
|
|
96
92
|
else:
|
|
97
93
|
raise RuntimeError("hidden size and ffn hidden size must be divided by rank size without remainder. \
|
|
98
94
|
hidden_size: ", hid_size, " ffn_hidden_size: ", ffn_hid, " rank_size: ", rank_size)
|
|
99
|
-
quant =
|
|
100
|
-
unite_qkv =
|
|
95
|
+
quant = self._get_from_dict(l_dict, "_weight_quantizer") is not None
|
|
96
|
+
unite_qkv = config.num_heads == config.n_kv_heads
|
|
101
97
|
self.dictionary[prefix + ".attention_norm.weight"] = \
|
|
102
98
|
Tensor(self._get_from_dict(l_dict, "attention_norm"), dtype=dtype.float16)
|
|
103
99
|
self.dictionary[prefix + ".ffn_norm.weight"] = \
|
|
@@ -18,7 +18,6 @@ import numpy as np
|
|
|
18
18
|
import mindspore as ms
|
|
19
19
|
from mindspore import ops, Tensor
|
|
20
20
|
from mindspore import log as logger
|
|
21
|
-
from mindspore.ops import operations as P
|
|
22
21
|
import mindspore.common.dtype as mstype
|
|
23
22
|
from mindspore._c_expression import _set_format
|
|
24
23
|
from mindspore.common.parameter import Parameter
|
|
@@ -95,8 +94,8 @@ class AtbBoostBase:
|
|
|
95
94
|
self.max_base_len, dtype=self.dtype, need_nz=self.need_nz
|
|
96
95
|
)
|
|
97
96
|
|
|
98
|
-
self.cast =
|
|
99
|
-
self.reshape =
|
|
97
|
+
self.cast = ops.Cast()
|
|
98
|
+
self.reshape = ops.Reshape()
|
|
100
99
|
self.kv_quant = None
|
|
101
100
|
self.rank_id = get_real_rank()
|
|
102
101
|
self.device_num = get_real_group_size()
|
|
@@ -1,34 +1,24 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adadelta.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adadelta"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
import mindspore.common.dtype as mstype
|
|
20
10
|
from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
|
|
21
11
|
from mindspore import _checkparam as validator
|
|
22
12
|
from mindspore import jit
|
|
23
13
|
|
|
24
|
-
_adadelta_opt =
|
|
14
|
+
_adadelta_opt = ops.MultitypeFuncGraph("adadelta_opt")
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
@_adadelta_opt.register("Function", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
|
|
28
18
|
def _tensor_run_opt(opt, rho, epsilon, learning_rate, weight, accum, accum_update, gradient):
|
|
29
19
|
"""Apply adadelta optimizer to the weight parameter."""
|
|
30
20
|
success = True
|
|
31
|
-
success =
|
|
21
|
+
success = ops.depend(success, opt(weight, accum, accum_update, learning_rate, rho, epsilon, gradient))
|
|
32
22
|
return success
|
|
33
23
|
|
|
34
24
|
|
|
@@ -60,6 +50,9 @@ class Adadelta(Optimizer):
|
|
|
60
50
|
&\rule{180mm}{0.4pt} \\[-1.ex]
|
|
61
51
|
\end{aligned}
|
|
62
52
|
|
|
53
|
+
For more details about Adadelta algorithm, please refer to `ADADELTA: An Adaptive Learning Rate Method
|
|
54
|
+
<https://arxiv.org/abs/1212.5701>`_.
|
|
55
|
+
|
|
63
56
|
.. warning::
|
|
64
57
|
This is an experimental optimizer API that is subject to change.
|
|
65
58
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
@@ -129,18 +122,18 @@ class Adadelta(Optimizer):
|
|
|
129
122
|
|
|
130
123
|
self.accum = self.parameters.clone(prefix="accum", init=0)
|
|
131
124
|
self.accum_update = self.parameters.clone(prefix="accum_update", init=0)
|
|
132
|
-
self.opt =
|
|
133
|
-
self.op_cast =
|
|
125
|
+
self.opt = ops.ApplyAdadelta()
|
|
126
|
+
self.op_cast = ops.Cast()
|
|
134
127
|
|
|
135
128
|
@jit
|
|
136
129
|
def implementation(self, lr, rho, eps, maximize, weight_decay, start_id, end_id, gradients):
|
|
137
130
|
"""Extract the common computing part for acceleration"""
|
|
138
131
|
params = self.parameters[start_id: end_id]
|
|
139
|
-
grads = tuple([grad if not maximize else
|
|
132
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
140
133
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
141
134
|
accum = self.accum[start_id: end_id]
|
|
142
135
|
accum_update = self.accum_update[start_id: end_id]
|
|
143
|
-
self.hyper_map(
|
|
136
|
+
self.hyper_map(ops.partial(_adadelta_opt, self.opt, rho, eps, lr),
|
|
144
137
|
params, accum, accum_update, grads)
|
|
145
138
|
return True
|
|
146
139
|
|
|
@@ -1,34 +1,24 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adagrad.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adagrad"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common import Tensor, Parameter
|
|
20
10
|
import mindspore.common.dtype as mstype
|
|
21
11
|
from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
|
|
22
12
|
from mindspore import jit
|
|
23
13
|
|
|
24
|
-
_adagrad_opt =
|
|
14
|
+
_adagrad_opt = ops.MultitypeFuncGraph("adagrad_opt")
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
@_adagrad_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor")
|
|
28
18
|
def _tensor_run_opt(opt, learning_rate, weight, accum, gradient):
|
|
29
19
|
"""Apply adagrad optimizer to the weight parameter."""
|
|
30
20
|
success = True
|
|
31
|
-
success =
|
|
21
|
+
success = ops.depend(success, opt(weight, accum, learning_rate, gradient))
|
|
32
22
|
return success
|
|
33
23
|
|
|
34
24
|
|
|
@@ -57,6 +47,9 @@ class Adagrad(Optimizer):
|
|
|
57
47
|
&\rule{160mm}{0.4pt} \\[-1.ex]
|
|
58
48
|
\end{aligned}
|
|
59
49
|
|
|
50
|
+
For more details about Adagrad algorithm, please refer to `Adaptive Subgradient Methods for Online Learning and \
|
|
51
|
+
Stochastic Optimization <https://jmlr.org/papers/v12/duchi11a.html>`_.
|
|
52
|
+
|
|
60
53
|
.. warning::
|
|
61
54
|
This is an experimental optimizer API that is subject to change.
|
|
62
55
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
@@ -129,22 +122,22 @@ class Adagrad(Optimizer):
|
|
|
129
122
|
super(Adagrad, self).__init__(params, defaults)
|
|
130
123
|
|
|
131
124
|
self.accum = self.parameters.clone(prefix="accum", init=initial_accumulator_value)
|
|
132
|
-
self.op_cast =
|
|
125
|
+
self.op_cast = ops.Cast()
|
|
133
126
|
self.step_t = Parameter(Tensor(0, mstype.int32), "step_t")
|
|
134
127
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
135
|
-
self.assignadd =
|
|
136
|
-
self.assign =
|
|
128
|
+
self.assignadd = ops.AssignAdd()
|
|
129
|
+
self.assign = ops.Assign()
|
|
137
130
|
|
|
138
131
|
@jit
|
|
139
132
|
def implementation(self, eps, lr, lr_decay, maximize, weight_decay, start_id, end_id, gradients):
|
|
140
133
|
"""Extract the common computing part for acceleration"""
|
|
141
|
-
opt =
|
|
134
|
+
opt = ops.ApplyAdagradV2(epsilon=eps, update_slots=True)
|
|
142
135
|
decay_lr = lr / (1 + self.step_t * lr_decay)
|
|
143
136
|
params = self.parameters[start_id: end_id]
|
|
144
|
-
grads = tuple([grad if not maximize else
|
|
137
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
145
138
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
146
139
|
accum = self.accum[start_id: end_id]
|
|
147
|
-
self.hyper_map(
|
|
140
|
+
self.hyper_map(ops.partial(_adagrad_opt, opt, decay_lr), params, accum, grads)
|
|
148
141
|
return True
|
|
149
142
|
|
|
150
143
|
def construct(self, gradients):
|
|
@@ -1,29 +1,19 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adam.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adam"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common.parameter import Parameter
|
|
20
10
|
from mindspore.common.tensor import Tensor
|
|
21
11
|
import mindspore.common.dtype as mstype
|
|
22
12
|
from mindspore.experimental.optim.optimizer import Optimizer
|
|
23
13
|
from mindspore.common.api import jit
|
|
24
14
|
|
|
25
|
-
_adam_opt =
|
|
26
|
-
adam_op =
|
|
15
|
+
_adam_opt = ops.MultitypeFuncGraph("adam_opt")
|
|
16
|
+
adam_op = ops.Adam(False, False)
|
|
27
17
|
|
|
28
18
|
|
|
29
19
|
@_adam_opt.register("Tensor", "Tensor", "Float", "Float", "Float", "Tensor",
|
|
@@ -81,6 +71,9 @@ class Adam(Optimizer):
|
|
|
81
71
|
&\rule{180mm}{0.4pt} \\[-1.ex]
|
|
82
72
|
\end{aligned}
|
|
83
73
|
|
|
74
|
+
For more details about Adam algorithm, please refer to `Adam: A Method for Stochastic Optimization
|
|
75
|
+
<https://arxiv.org/abs/1412.6980>`_.
|
|
76
|
+
|
|
84
77
|
.. warning::
|
|
85
78
|
The implementation formula of this optimizer interface is not completely consistent with that in the paper.
|
|
86
79
|
If you want to use an interface that is completely consistent, it is recommended to use
|
|
@@ -160,12 +153,12 @@ class Adam(Optimizer):
|
|
|
160
153
|
self.max_exp_avg_sq = self.parameters.clone(prefix="max_exp_avg_sq", init='zeros')
|
|
161
154
|
self.state_step = Parameter(Tensor(0, mstype.int32), "state_step")
|
|
162
155
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
163
|
-
self.assignadd =
|
|
164
|
-
self.op_add =
|
|
165
|
-
self.op_mul =
|
|
166
|
-
self.op_pow =
|
|
167
|
-
self.adam_opt =
|
|
168
|
-
self.op_cast =
|
|
156
|
+
self.assignadd = ops.AssignAdd()
|
|
157
|
+
self.op_add = ops.AddN()
|
|
158
|
+
self.op_mul = ops.Mul()
|
|
159
|
+
self.op_pow = ops.Pow()
|
|
160
|
+
self.adam_opt = ops.Adam(False, False)
|
|
161
|
+
self.op_cast = ops.Cast()
|
|
169
162
|
|
|
170
163
|
@jit
|
|
171
164
|
def implementation(self, beta1, beta2, eps, lr, start_id, end_id, gradients, maximize, weight_decay):
|
|
@@ -173,9 +166,9 @@ class Adam(Optimizer):
|
|
|
173
166
|
beta1_power = self.op_pow(beta1, self.state_step)
|
|
174
167
|
beta2_power = self.op_pow(beta2, self.state_step)
|
|
175
168
|
params = self.parameters[start_id: end_id]
|
|
176
|
-
grads = tuple([grad if not maximize else
|
|
169
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
177
170
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
178
|
-
self.hyper_map(
|
|
171
|
+
self.hyper_map(ops.partial(_adam_opt, beta1_power, beta2_power, beta1, beta2, eps, lr),
|
|
179
172
|
grads, params,
|
|
180
173
|
self.exp_avg[start_id: end_id], self.exp_avg_sq[start_id: end_id])
|
|
181
174
|
return True
|
|
@@ -1,21 +1,10 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adamax.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adamax"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
8
|
from mindspore.common import Tensor, Parameter
|
|
20
9
|
import mindspore.common.dtype as mstype
|
|
21
10
|
from mindspore import _checkparam as validator
|
|
@@ -23,17 +12,17 @@ from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_tha
|
|
|
23
12
|
from mindspore import ops
|
|
24
13
|
from mindspore import jit
|
|
25
14
|
|
|
26
|
-
_adamax_opt =
|
|
15
|
+
_adamax_opt = ops.MultitypeFuncGraph("adamax_opt")
|
|
27
16
|
|
|
28
17
|
|
|
29
18
|
@_adamax_opt.register("Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
|
|
30
19
|
def _tensor_run_opt(beta1, beta2, eps, clr, param, grad, exp_avg, exp_inf):
|
|
31
20
|
"""Apply adamax optimizer to the weight parameter."""
|
|
32
|
-
|
|
21
|
+
ops.assign(exp_avg, exp_avg * beta1 + grad * (1-beta1))
|
|
33
22
|
norm_buf = ops.cat([ops.unsqueeze(exp_inf * beta2, 0), ops.unsqueeze(grad.abs().add(eps), 0)], 0)
|
|
34
|
-
|
|
23
|
+
ops.assign(exp_inf, ops.amax(norm_buf, 0))
|
|
35
24
|
|
|
36
|
-
|
|
25
|
+
ops.assign(param, param - clr * exp_avg / exp_inf)
|
|
37
26
|
return True
|
|
38
27
|
|
|
39
28
|
|
|
@@ -68,6 +57,9 @@ class Adamax(Optimizer):
|
|
|
68
57
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
69
58
|
<https://www.mindspore.cn/docs/en/master/api_python/mindspore.experimental.html#lrscheduler-class>`_ .
|
|
70
59
|
|
|
60
|
+
For more details about Adamax algorithm, please refer to `Adam: A Method for Stochastic Optimization
|
|
61
|
+
<https://arxiv.org/abs/1412.6980>`_.
|
|
62
|
+
|
|
71
63
|
Args:
|
|
72
64
|
params (Union[list(Parameter), list(dict)]): list of parameters to optimize or dicts defining
|
|
73
65
|
parameter groups.
|
|
@@ -135,8 +127,8 @@ class Adamax(Optimizer):
|
|
|
135
127
|
self.exp_avg = self.parameters.clone(prefix="exp_avg", init='zeros')
|
|
136
128
|
self.exp_inf = self.parameters.clone(prefix="exp_inf", init='zeros')
|
|
137
129
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
138
|
-
self.assignadd =
|
|
139
|
-
self.op_cast =
|
|
130
|
+
self.assignadd = ops.AssignAdd()
|
|
131
|
+
self.op_cast = ops.Cast()
|
|
140
132
|
|
|
141
133
|
@jit
|
|
142
134
|
def implementation(self, group_id, lr, gradients, maximize, weight_decay, beta1, beta2, eps):
|
|
@@ -144,13 +136,13 @@ class Adamax(Optimizer):
|
|
|
144
136
|
start_id = self.group_start_id[group_id]
|
|
145
137
|
end_id = self.group_start_id[group_id + 1]
|
|
146
138
|
params = self.parameters[start_id: end_id]
|
|
147
|
-
grads = tuple([grad if not maximize else
|
|
139
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
148
140
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
149
141
|
exp_avg = self.exp_avg[start_id: end_id]
|
|
150
142
|
exp_inf = self.exp_inf[start_id: end_id]
|
|
151
143
|
bias_correction = 1 - beta1 ** self.step_t
|
|
152
144
|
clr = lr / bias_correction
|
|
153
|
-
self.hyper_map(
|
|
145
|
+
self.hyper_map(ops.partial(_adamax_opt, beta1, beta2, eps, clr),
|
|
154
146
|
params, grads, exp_avg, exp_inf)
|
|
155
147
|
return True
|
|
156
148
|
|
|
@@ -1,21 +1,10 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adamw.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adamw"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
8
|
from mindspore.common.parameter import Parameter
|
|
20
9
|
from mindspore.common.tensor import Tensor
|
|
21
10
|
import mindspore.common.dtype as mstype
|
|
@@ -25,14 +14,14 @@ from mindspore.ops import auto_generate as gen
|
|
|
25
14
|
from mindspore import ops
|
|
26
15
|
from mindspore import jit
|
|
27
16
|
|
|
28
|
-
_adamw_opt =
|
|
29
|
-
_speed_adamw_opt =
|
|
17
|
+
_adamw_opt = ops.MultitypeFuncGraph("adamw_opt")
|
|
18
|
+
_speed_adamw_opt = ops.MultitypeFuncGraph("speed_adamw_opt")
|
|
30
19
|
|
|
31
|
-
op_mul =
|
|
32
|
-
op_pow =
|
|
33
|
-
op_sqrt =
|
|
34
|
-
op_maximum =
|
|
35
|
-
hyper_map =
|
|
20
|
+
op_mul = ops.Mul()
|
|
21
|
+
op_pow = ops.Pow()
|
|
22
|
+
op_sqrt = ops.Sqrt()
|
|
23
|
+
op_maximum = ops.Maximum()
|
|
24
|
+
hyper_map = ops.HyperMap()
|
|
36
25
|
|
|
37
26
|
|
|
38
27
|
@_speed_adamw_opt.register("Function", "Float", "Float", "Tensor", "Float", "Float", "Bool", "Bool", "Tensor", "Tensor",
|
|
@@ -76,18 +65,18 @@ def _run_adamw_opt(weight_decay_new, step_size, amsgrad, eps, bias_correction2_s
|
|
|
76
65
|
"""Apply adamw optimizer to the weight parameter."""
|
|
77
66
|
success = True
|
|
78
67
|
next_param = op_mul(param, weight_decay_new)
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
ops.assign(exp_avg, op_mul(exp_avg, beta1) + op_mul(grad, 1 - beta1))
|
|
69
|
+
ops.assign(exp_avg_sq, ops.addcmul(op_mul(exp_avg_sq, beta2), grad, grad, 1 - beta2))
|
|
81
70
|
|
|
82
71
|
if amsgrad:
|
|
83
72
|
next_max_exp_avg = op_maximum(max_exp_avg_sq, exp_avg_sq)
|
|
84
73
|
denom = op_sqrt(next_max_exp_avg) / bias_correction2_sqrt + eps
|
|
85
|
-
|
|
74
|
+
ops.assign(max_exp_avg_sq, next_max_exp_avg)
|
|
86
75
|
else:
|
|
87
76
|
denom = op_sqrt(exp_avg_sq) / bias_correction2_sqrt + eps
|
|
88
77
|
|
|
89
78
|
return_param = next_param - op_mul(exp_avg / denom, step_size)
|
|
90
|
-
|
|
79
|
+
ops.assign(param, return_param)
|
|
91
80
|
return success
|
|
92
81
|
|
|
93
82
|
|
|
@@ -129,6 +118,10 @@ class AdamW(Optimizer):
|
|
|
129
118
|
&\rule{180mm}{0.4pt} \\[-1.ex]
|
|
130
119
|
\end{aligned}
|
|
131
120
|
|
|
121
|
+
More details of the AdamW algorithm can be found in the paper `Decoupled Weight Decay Regularization
|
|
122
|
+
<https://arxiv.org/abs/1711.05101>`_ and `On the Convergence of Adam and Beyond
|
|
123
|
+
<https://openreview.net/forum?id=ryQu7f-RZ>`_.
|
|
124
|
+
|
|
132
125
|
.. warning::
|
|
133
126
|
This is an experimental optimizer API that is subject to change.
|
|
134
127
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
@@ -205,16 +198,16 @@ class AdamW(Optimizer):
|
|
|
205
198
|
self.max_exp_avg_sq = self.parameters.clone(prefix="max_exp_avg_sq", init='zeros')
|
|
206
199
|
self.state_step = Parameter(Tensor(0, mstype.int32), "state_step")
|
|
207
200
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
208
|
-
self.assignadd =
|
|
209
|
-
self.op_cast =
|
|
201
|
+
self.assignadd = ops.AssignAdd()
|
|
202
|
+
self.op_cast = ops.Cast()
|
|
210
203
|
|
|
211
204
|
@jit
|
|
212
205
|
def implementation(self, lr, weight_decay, beta1, beta2, amsgrad, eps, grads, start_id, end_id):
|
|
213
206
|
"""Extract the common computing part for acceleration"""
|
|
214
207
|
weight_decay_new, step_size, bias_correction2_sqrt = prepare_func(lr, weight_decay,
|
|
215
208
|
self.state_step, beta1, beta2)
|
|
216
|
-
self.hyper_map(
|
|
217
|
-
|
|
209
|
+
self.hyper_map(ops.partial(_adamw_opt, weight_decay_new, step_size, amsgrad,
|
|
210
|
+
eps, bias_correction2_sqrt, beta1, beta2),
|
|
218
211
|
self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
|
|
219
212
|
self.exp_avg_sq[start_id: end_id], self.max_exp_avg_sq[start_id: end_id])
|
|
220
213
|
return True
|
|
@@ -228,7 +221,8 @@ class AdamW(Optimizer):
|
|
|
228
221
|
lr = self.lrs[group_id]
|
|
229
222
|
if isinstance(group.get("lr"), float):
|
|
230
223
|
lr = self.op_cast(group.get("lr"), mstype.float32)
|
|
231
|
-
grads = tuple([grad if not group.get("maximize") else
|
|
224
|
+
grads = tuple([grad if not group.get("maximize") else ops.neg(grad) \
|
|
225
|
+
for grad in gradients[start_id:end_id]])
|
|
232
226
|
|
|
233
227
|
self.implementation(lr, group.get("weight_decay"), beta1, beta2, group.get("amsgrad"), group.get("eps"),
|
|
234
228
|
grads, start_id, end_id)
|
|
@@ -265,7 +259,7 @@ class SpeedAdamW(Optimizer):
|
|
|
265
259
|
self.exp_avg_sq = self.parameters.clone(prefix="exp_avg_sq", init='zeros')
|
|
266
260
|
self.state_step = Parameter(Tensor([0], mstype.float32), "state_step")
|
|
267
261
|
self.increase_tensor = Tensor(1, mstype.float32)
|
|
268
|
-
self.assignadd =
|
|
262
|
+
self.assignadd = ops.AssignAdd()
|
|
269
263
|
self.adamw_opt = gen.ApplyAdamW()
|
|
270
264
|
|
|
271
265
|
def construct(self, gradients):
|
|
@@ -285,9 +279,9 @@ class SpeedAdamW(Optimizer):
|
|
|
285
279
|
if group.get("amsgrad"):
|
|
286
280
|
raise ValueError("For SpeedAdamW, the value of amsgrad can only be False.")
|
|
287
281
|
|
|
288
|
-
self.hyper_map(
|
|
289
|
-
|
|
290
|
-
|
|
282
|
+
self.hyper_map(ops.partial(_speed_adamw_opt, self.adamw_opt, beta1, beta2, lr,
|
|
283
|
+
group.get("eps"), group.get("weight_decay"),
|
|
284
|
+
group.get("amsgrad"), maximize, bias_correction1, bias_correction2),
|
|
291
285
|
self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
|
|
292
286
|
self.exp_avg_sq[start_id: end_id])
|
|
293
287
|
|
|
@@ -1,33 +1,23 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/asgd.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""asgd"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common import Tensor, Parameter
|
|
20
10
|
import mindspore.common.dtype as mstype
|
|
21
11
|
from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
|
|
22
12
|
from mindspore.common.api import jit
|
|
23
13
|
|
|
24
|
-
_asgd_opt =
|
|
14
|
+
_asgd_opt = ops.MultitypeFuncGraph("asgd_opt")
|
|
25
15
|
|
|
26
|
-
op_cast =
|
|
27
|
-
op_pow =
|
|
28
|
-
op_maximum =
|
|
29
|
-
op_assign =
|
|
30
|
-
op_assignadd =
|
|
16
|
+
op_cast = ops.Cast()
|
|
17
|
+
op_pow = ops.Pow()
|
|
18
|
+
op_maximum = ops.Maximum()
|
|
19
|
+
op_assign = ops.Assign()
|
|
20
|
+
op_assignadd = ops.AssignAdd()
|
|
31
21
|
|
|
32
22
|
|
|
33
23
|
@_asgd_opt.register("Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor",
|
|
@@ -37,7 +27,7 @@ def _run_asgd_opt(lambd, alpha, t0, step, lr, param, grad, eta, mu, ax):
|
|
|
37
27
|
if step == 1:
|
|
38
28
|
op_assign(eta, lr)
|
|
39
29
|
next_param = op_cast(param * (1. - lambd * eta) - eta * grad, param.dtype)
|
|
40
|
-
|
|
30
|
+
ops.assign(param, next_param)
|
|
41
31
|
|
|
42
32
|
if mu != 1:
|
|
43
33
|
op_assignadd(ax, op_cast((next_param - ax) * mu, ax.dtype))
|
|
@@ -121,8 +111,8 @@ class ASGD(Optimizer):
|
|
|
121
111
|
self.ax = self.parameters.clone(prefix="ax", init='zeros')
|
|
122
112
|
self.step_t = Parameter(Tensor(0, mstype.int32), "step_t")
|
|
123
113
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
124
|
-
self.assignadd =
|
|
125
|
-
self.op_cast =
|
|
114
|
+
self.assignadd = ops.AssignAdd()
|
|
115
|
+
self.op_cast = ops.Cast()
|
|
126
116
|
|
|
127
117
|
@jit(backend="ms_backend")
|
|
128
118
|
def implementation(self, lambd, alpha, t0, lr, group_id, maximize, gradients, weight_decay):
|
|
@@ -130,13 +120,13 @@ class ASGD(Optimizer):
|
|
|
130
120
|
start_id = self.group_start_id[group_id]
|
|
131
121
|
end_id = self.group_start_id[group_id + 1]
|
|
132
122
|
params = self.parameters[start_id: end_id]
|
|
133
|
-
grads = tuple([grad if not maximize else
|
|
123
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
134
124
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
135
125
|
|
|
136
126
|
ax = self.ax[start_id: end_id]
|
|
137
127
|
eta = self.eta[start_id: end_id]
|
|
138
128
|
mu = self.mu[start_id: end_id]
|
|
139
|
-
self.hyper_map(
|
|
129
|
+
self.hyper_map(ops.partial(_asgd_opt, lambd, alpha, t0, self.step_t, lr),
|
|
140
130
|
params, grads, eta, mu, ax)
|
|
141
131
|
return True
|
|
142
132
|
|