mindspore 2.6.0__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +36 -61
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +32 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +76 -15
- mindspore/common/api.py +193 -112
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +48 -83
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +38 -23
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +32 -2
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +208 -5
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +122 -98
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +325 -499
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
- mindspore/ops/auto_generate/gen_extend_func.py +1 -51
- mindspore/ops/auto_generate/gen_ops_def.py +463 -257
- mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +7 -94
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +21 -367
- mindspore/ops/function/nn_func.py +26 -41
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +0 -2
- mindspore/ops/functional_overload.py +463 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +235 -172
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +5 -6
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +36 -4
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +4 -2
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +12 -5
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +3 -1
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +1 -1
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +2 -2
- mindspore/parallel/transform_safetensors.py +462 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +21 -30
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +6 -2
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +71 -13
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +51 -33
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +329 -367
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,19 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adam.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adam"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common.parameter import Parameter
|
|
20
10
|
from mindspore.common.tensor import Tensor
|
|
21
11
|
import mindspore.common.dtype as mstype
|
|
22
12
|
from mindspore.experimental.optim.optimizer import Optimizer
|
|
23
13
|
from mindspore.common.api import jit
|
|
24
14
|
|
|
25
|
-
_adam_opt =
|
|
26
|
-
adam_op =
|
|
15
|
+
_adam_opt = ops.MultitypeFuncGraph("adam_opt")
|
|
16
|
+
adam_op = ops.Adam(False, False)
|
|
27
17
|
|
|
28
18
|
|
|
29
19
|
@_adam_opt.register("Tensor", "Tensor", "Float", "Float", "Float", "Tensor",
|
|
@@ -81,6 +71,9 @@ class Adam(Optimizer):
|
|
|
81
71
|
&\rule{180mm}{0.4pt} \\[-1.ex]
|
|
82
72
|
\end{aligned}
|
|
83
73
|
|
|
74
|
+
For more details about Adam algorithm, please refer to `Adam: A Method for Stochastic Optimization
|
|
75
|
+
<https://arxiv.org/abs/1412.6980>`_.
|
|
76
|
+
|
|
84
77
|
.. warning::
|
|
85
78
|
The implementation formula of this optimizer interface is not completely consistent with that in the paper.
|
|
86
79
|
If you want to use an interface that is completely consistent, it is recommended to use
|
|
@@ -160,12 +153,12 @@ class Adam(Optimizer):
|
|
|
160
153
|
self.max_exp_avg_sq = self.parameters.clone(prefix="max_exp_avg_sq", init='zeros')
|
|
161
154
|
self.state_step = Parameter(Tensor(0, mstype.int32), "state_step")
|
|
162
155
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
163
|
-
self.assignadd =
|
|
164
|
-
self.op_add =
|
|
165
|
-
self.op_mul =
|
|
166
|
-
self.op_pow =
|
|
167
|
-
self.adam_opt =
|
|
168
|
-
self.op_cast =
|
|
156
|
+
self.assignadd = ops.AssignAdd()
|
|
157
|
+
self.op_add = ops.AddN()
|
|
158
|
+
self.op_mul = ops.Mul()
|
|
159
|
+
self.op_pow = ops.Pow()
|
|
160
|
+
self.adam_opt = ops.Adam(False, False)
|
|
161
|
+
self.op_cast = ops.Cast()
|
|
169
162
|
|
|
170
163
|
@jit
|
|
171
164
|
def implementation(self, beta1, beta2, eps, lr, start_id, end_id, gradients, maximize, weight_decay):
|
|
@@ -173,9 +166,9 @@ class Adam(Optimizer):
|
|
|
173
166
|
beta1_power = self.op_pow(beta1, self.state_step)
|
|
174
167
|
beta2_power = self.op_pow(beta2, self.state_step)
|
|
175
168
|
params = self.parameters[start_id: end_id]
|
|
176
|
-
grads = tuple([grad if not maximize else
|
|
169
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
177
170
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
178
|
-
self.hyper_map(
|
|
171
|
+
self.hyper_map(ops.partial(_adam_opt, beta1_power, beta2_power, beta1, beta2, eps, lr),
|
|
179
172
|
grads, params,
|
|
180
173
|
self.exp_avg[start_id: end_id], self.exp_avg_sq[start_id: end_id])
|
|
181
174
|
return True
|
|
@@ -1,21 +1,10 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adamax.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adamax"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
8
|
from mindspore.common import Tensor, Parameter
|
|
20
9
|
import mindspore.common.dtype as mstype
|
|
21
10
|
from mindspore import _checkparam as validator
|
|
@@ -23,17 +12,17 @@ from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_tha
|
|
|
23
12
|
from mindspore import ops
|
|
24
13
|
from mindspore import jit
|
|
25
14
|
|
|
26
|
-
_adamax_opt =
|
|
15
|
+
_adamax_opt = ops.MultitypeFuncGraph("adamax_opt")
|
|
27
16
|
|
|
28
17
|
|
|
29
18
|
@_adamax_opt.register("Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
|
|
30
19
|
def _tensor_run_opt(beta1, beta2, eps, clr, param, grad, exp_avg, exp_inf):
|
|
31
20
|
"""Apply adamax optimizer to the weight parameter."""
|
|
32
|
-
|
|
21
|
+
ops.assign(exp_avg, exp_avg * beta1 + grad * (1-beta1))
|
|
33
22
|
norm_buf = ops.cat([ops.unsqueeze(exp_inf * beta2, 0), ops.unsqueeze(grad.abs().add(eps), 0)], 0)
|
|
34
|
-
|
|
23
|
+
ops.assign(exp_inf, ops.amax(norm_buf, 0))
|
|
35
24
|
|
|
36
|
-
|
|
25
|
+
ops.assign(param, param - clr * exp_avg / exp_inf)
|
|
37
26
|
return True
|
|
38
27
|
|
|
39
28
|
|
|
@@ -68,6 +57,9 @@ class Adamax(Optimizer):
|
|
|
68
57
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
69
58
|
<https://www.mindspore.cn/docs/en/master/api_python/mindspore.experimental.html#lrscheduler-class>`_ .
|
|
70
59
|
|
|
60
|
+
For more details about Adamax algorithm, please refer to `Adam: A Method for Stochastic Optimization
|
|
61
|
+
<https://arxiv.org/abs/1412.6980>`_.
|
|
62
|
+
|
|
71
63
|
Args:
|
|
72
64
|
params (Union[list(Parameter), list(dict)]): list of parameters to optimize or dicts defining
|
|
73
65
|
parameter groups.
|
|
@@ -135,8 +127,8 @@ class Adamax(Optimizer):
|
|
|
135
127
|
self.exp_avg = self.parameters.clone(prefix="exp_avg", init='zeros')
|
|
136
128
|
self.exp_inf = self.parameters.clone(prefix="exp_inf", init='zeros')
|
|
137
129
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
138
|
-
self.assignadd =
|
|
139
|
-
self.op_cast =
|
|
130
|
+
self.assignadd = ops.AssignAdd()
|
|
131
|
+
self.op_cast = ops.Cast()
|
|
140
132
|
|
|
141
133
|
@jit
|
|
142
134
|
def implementation(self, group_id, lr, gradients, maximize, weight_decay, beta1, beta2, eps):
|
|
@@ -144,13 +136,13 @@ class Adamax(Optimizer):
|
|
|
144
136
|
start_id = self.group_start_id[group_id]
|
|
145
137
|
end_id = self.group_start_id[group_id + 1]
|
|
146
138
|
params = self.parameters[start_id: end_id]
|
|
147
|
-
grads = tuple([grad if not maximize else
|
|
139
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
148
140
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
149
141
|
exp_avg = self.exp_avg[start_id: end_id]
|
|
150
142
|
exp_inf = self.exp_inf[start_id: end_id]
|
|
151
143
|
bias_correction = 1 - beta1 ** self.step_t
|
|
152
144
|
clr = lr / bias_correction
|
|
153
|
-
self.hyper_map(
|
|
145
|
+
self.hyper_map(ops.partial(_adamax_opt, beta1, beta2, eps, clr),
|
|
154
146
|
params, grads, exp_avg, exp_inf)
|
|
155
147
|
return True
|
|
156
148
|
|
|
@@ -1,21 +1,10 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adamw.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""adamw"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore.ops import functional as F, composite as C, operations as P
|
|
19
8
|
from mindspore.common.parameter import Parameter
|
|
20
9
|
from mindspore.common.tensor import Tensor
|
|
21
10
|
import mindspore.common.dtype as mstype
|
|
@@ -25,14 +14,14 @@ from mindspore.ops import auto_generate as gen
|
|
|
25
14
|
from mindspore import ops
|
|
26
15
|
from mindspore import jit
|
|
27
16
|
|
|
28
|
-
_adamw_opt =
|
|
29
|
-
_speed_adamw_opt =
|
|
17
|
+
_adamw_opt = ops.MultitypeFuncGraph("adamw_opt")
|
|
18
|
+
_speed_adamw_opt = ops.MultitypeFuncGraph("speed_adamw_opt")
|
|
30
19
|
|
|
31
|
-
op_mul =
|
|
32
|
-
op_pow =
|
|
33
|
-
op_sqrt =
|
|
34
|
-
op_maximum =
|
|
35
|
-
hyper_map =
|
|
20
|
+
op_mul = ops.Mul()
|
|
21
|
+
op_pow = ops.Pow()
|
|
22
|
+
op_sqrt = ops.Sqrt()
|
|
23
|
+
op_maximum = ops.Maximum()
|
|
24
|
+
hyper_map = ops.HyperMap()
|
|
36
25
|
|
|
37
26
|
|
|
38
27
|
@_speed_adamw_opt.register("Function", "Float", "Float", "Tensor", "Float", "Float", "Bool", "Bool", "Tensor", "Tensor",
|
|
@@ -76,18 +65,18 @@ def _run_adamw_opt(weight_decay_new, step_size, amsgrad, eps, bias_correction2_s
|
|
|
76
65
|
"""Apply adamw optimizer to the weight parameter."""
|
|
77
66
|
success = True
|
|
78
67
|
next_param = op_mul(param, weight_decay_new)
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
ops.assign(exp_avg, op_mul(exp_avg, beta1) + op_mul(grad, 1 - beta1))
|
|
69
|
+
ops.assign(exp_avg_sq, ops.addcmul(op_mul(exp_avg_sq, beta2), grad, grad, 1 - beta2))
|
|
81
70
|
|
|
82
71
|
if amsgrad:
|
|
83
72
|
next_max_exp_avg = op_maximum(max_exp_avg_sq, exp_avg_sq)
|
|
84
73
|
denom = op_sqrt(next_max_exp_avg) / bias_correction2_sqrt + eps
|
|
85
|
-
|
|
74
|
+
ops.assign(max_exp_avg_sq, next_max_exp_avg)
|
|
86
75
|
else:
|
|
87
76
|
denom = op_sqrt(exp_avg_sq) / bias_correction2_sqrt + eps
|
|
88
77
|
|
|
89
78
|
return_param = next_param - op_mul(exp_avg / denom, step_size)
|
|
90
|
-
|
|
79
|
+
ops.assign(param, return_param)
|
|
91
80
|
return success
|
|
92
81
|
|
|
93
82
|
|
|
@@ -129,6 +118,10 @@ class AdamW(Optimizer):
|
|
|
129
118
|
&\rule{180mm}{0.4pt} \\[-1.ex]
|
|
130
119
|
\end{aligned}
|
|
131
120
|
|
|
121
|
+
More details of the AdamW algorithm can be found in the paper `Decoupled Weight Decay Regularization
|
|
122
|
+
<https://arxiv.org/abs/1711.05101>`_ and `On the Convergence of Adam and Beyond
|
|
123
|
+
<https://openreview.net/forum?id=ryQu7f-RZ>`_.
|
|
124
|
+
|
|
132
125
|
.. warning::
|
|
133
126
|
This is an experimental optimizer API that is subject to change.
|
|
134
127
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
@@ -205,16 +198,16 @@ class AdamW(Optimizer):
|
|
|
205
198
|
self.max_exp_avg_sq = self.parameters.clone(prefix="max_exp_avg_sq", init='zeros')
|
|
206
199
|
self.state_step = Parameter(Tensor(0, mstype.int32), "state_step")
|
|
207
200
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
208
|
-
self.assignadd =
|
|
209
|
-
self.op_cast =
|
|
201
|
+
self.assignadd = ops.AssignAdd()
|
|
202
|
+
self.op_cast = ops.Cast()
|
|
210
203
|
|
|
211
204
|
@jit
|
|
212
205
|
def implementation(self, lr, weight_decay, beta1, beta2, amsgrad, eps, grads, start_id, end_id):
|
|
213
206
|
"""Extract the common computing part for acceleration"""
|
|
214
207
|
weight_decay_new, step_size, bias_correction2_sqrt = prepare_func(lr, weight_decay,
|
|
215
208
|
self.state_step, beta1, beta2)
|
|
216
|
-
self.hyper_map(
|
|
217
|
-
|
|
209
|
+
self.hyper_map(ops.partial(_adamw_opt, weight_decay_new, step_size, amsgrad,
|
|
210
|
+
eps, bias_correction2_sqrt, beta1, beta2),
|
|
218
211
|
self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
|
|
219
212
|
self.exp_avg_sq[start_id: end_id], self.max_exp_avg_sq[start_id: end_id])
|
|
220
213
|
return True
|
|
@@ -228,7 +221,8 @@ class AdamW(Optimizer):
|
|
|
228
221
|
lr = self.lrs[group_id]
|
|
229
222
|
if isinstance(group.get("lr"), float):
|
|
230
223
|
lr = self.op_cast(group.get("lr"), mstype.float32)
|
|
231
|
-
grads = tuple([grad if not group.get("maximize") else
|
|
224
|
+
grads = tuple([grad if not group.get("maximize") else ops.neg(grad) \
|
|
225
|
+
for grad in gradients[start_id:end_id]])
|
|
232
226
|
|
|
233
227
|
self.implementation(lr, group.get("weight_decay"), beta1, beta2, group.get("amsgrad"), group.get("eps"),
|
|
234
228
|
grads, start_id, end_id)
|
|
@@ -265,7 +259,7 @@ class SpeedAdamW(Optimizer):
|
|
|
265
259
|
self.exp_avg_sq = self.parameters.clone(prefix="exp_avg_sq", init='zeros')
|
|
266
260
|
self.state_step = Parameter(Tensor([0], mstype.float32), "state_step")
|
|
267
261
|
self.increase_tensor = Tensor(1, mstype.float32)
|
|
268
|
-
self.assignadd =
|
|
262
|
+
self.assignadd = ops.AssignAdd()
|
|
269
263
|
self.adamw_opt = gen.ApplyAdamW()
|
|
270
264
|
|
|
271
265
|
def construct(self, gradients):
|
|
@@ -285,9 +279,9 @@ class SpeedAdamW(Optimizer):
|
|
|
285
279
|
if group.get("amsgrad"):
|
|
286
280
|
raise ValueError("For SpeedAdamW, the value of amsgrad can only be False.")
|
|
287
281
|
|
|
288
|
-
self.hyper_map(
|
|
289
|
-
|
|
290
|
-
|
|
282
|
+
self.hyper_map(ops.partial(_speed_adamw_opt, self.adamw_opt, beta1, beta2, lr,
|
|
283
|
+
group.get("eps"), group.get("weight_decay"),
|
|
284
|
+
group.get("amsgrad"), maximize, bias_correction1, bias_correction2),
|
|
291
285
|
self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
|
|
292
286
|
self.exp_avg_sq[start_id: end_id])
|
|
293
287
|
|
|
@@ -1,33 +1,23 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/asgd.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""asgd"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common import Tensor, Parameter
|
|
20
10
|
import mindspore.common.dtype as mstype
|
|
21
11
|
from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
|
|
22
12
|
from mindspore.common.api import jit
|
|
23
13
|
|
|
24
|
-
_asgd_opt =
|
|
14
|
+
_asgd_opt = ops.MultitypeFuncGraph("asgd_opt")
|
|
25
15
|
|
|
26
|
-
op_cast =
|
|
27
|
-
op_pow =
|
|
28
|
-
op_maximum =
|
|
29
|
-
op_assign =
|
|
30
|
-
op_assignadd =
|
|
16
|
+
op_cast = ops.Cast()
|
|
17
|
+
op_pow = ops.Pow()
|
|
18
|
+
op_maximum = ops.Maximum()
|
|
19
|
+
op_assign = ops.Assign()
|
|
20
|
+
op_assignadd = ops.AssignAdd()
|
|
31
21
|
|
|
32
22
|
|
|
33
23
|
@_asgd_opt.register("Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor",
|
|
@@ -37,7 +27,7 @@ def _run_asgd_opt(lambd, alpha, t0, step, lr, param, grad, eta, mu, ax):
|
|
|
37
27
|
if step == 1:
|
|
38
28
|
op_assign(eta, lr)
|
|
39
29
|
next_param = op_cast(param * (1. - lambd * eta) - eta * grad, param.dtype)
|
|
40
|
-
|
|
30
|
+
ops.assign(param, next_param)
|
|
41
31
|
|
|
42
32
|
if mu != 1:
|
|
43
33
|
op_assignadd(ax, op_cast((next_param - ax) * mu, ax.dtype))
|
|
@@ -121,8 +111,8 @@ class ASGD(Optimizer):
|
|
|
121
111
|
self.ax = self.parameters.clone(prefix="ax", init='zeros')
|
|
122
112
|
self.step_t = Parameter(Tensor(0, mstype.int32), "step_t")
|
|
123
113
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
124
|
-
self.assignadd =
|
|
125
|
-
self.op_cast =
|
|
114
|
+
self.assignadd = ops.AssignAdd()
|
|
115
|
+
self.op_cast = ops.Cast()
|
|
126
116
|
|
|
127
117
|
@jit(backend="ms_backend")
|
|
128
118
|
def implementation(self, lambd, alpha, t0, lr, group_id, maximize, gradients, weight_decay):
|
|
@@ -130,13 +120,13 @@ class ASGD(Optimizer):
|
|
|
130
120
|
start_id = self.group_start_id[group_id]
|
|
131
121
|
end_id = self.group_start_id[group_id + 1]
|
|
132
122
|
params = self.parameters[start_id: end_id]
|
|
133
|
-
grads = tuple([grad if not maximize else
|
|
123
|
+
grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
|
|
134
124
|
grads = self._decay_weight(weight_decay, params, grads)
|
|
135
125
|
|
|
136
126
|
ax = self.ax[start_id: end_id]
|
|
137
127
|
eta = self.eta[start_id: end_id]
|
|
138
128
|
mu = self.mu[start_id: end_id]
|
|
139
|
-
self.hyper_map(
|
|
129
|
+
self.hyper_map(ops.partial(_asgd_opt, lambd, alpha, t0, self.step_t, lr),
|
|
140
130
|
params, grads, eta, mu, ax)
|
|
141
131
|
return True
|
|
142
132
|
|
|
@@ -1,16 +1,6 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/lr_scheduler.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""LRScheduler."""
|
|
16
6
|
from collections import Counter
|
|
@@ -20,8 +10,6 @@ from mindspore import ops, Tensor, Parameter
|
|
|
20
10
|
from mindspore.experimental.optim.optimizer import Optimizer
|
|
21
11
|
from mindspore.common.api import jit_class
|
|
22
12
|
import mindspore.common.dtype as mstype
|
|
23
|
-
from mindspore.ops import functional as F
|
|
24
|
-
from mindspore.ops import operations as P
|
|
25
13
|
from mindspore import _checkparam as Validator
|
|
26
14
|
|
|
27
15
|
__all__ = ['StepLR', 'LinearLR', 'LRScheduler', 'ExponentialLR', 'PolynomialLR',
|
|
@@ -143,9 +131,12 @@ class LRScheduler:
|
|
|
143
131
|
|
|
144
132
|
@jit_class
|
|
145
133
|
class StepLR(LRScheduler):
|
|
146
|
-
"""
|
|
147
|
-
|
|
148
|
-
|
|
134
|
+
"""
|
|
135
|
+
During training, when calling `StepLR.step()` , if the current epoch number is an integer multiple of `step_size` ,
|
|
136
|
+
the learning rate will be decayed by multiplying it with `gamma` . The adjustment of the learning rate and
|
|
137
|
+
the parameter update of the optimizer are synergistically performed. The optimizer executes parameter optimization
|
|
138
|
+
operations based on the currently adjusted learning rate. The learning rate decay of StepLR may occur simultaneously
|
|
139
|
+
with external changes to the learning rate.
|
|
149
140
|
|
|
150
141
|
.. warning::
|
|
151
142
|
This is an experimental lr scheduler module that is subject to change.
|
|
@@ -431,8 +422,8 @@ class PolynomialLR(LRScheduler):
|
|
|
431
422
|
raise TypeError(f"For 'PolynomialLR', the type of total_iters must be int, but got {type(total_iters)}.")
|
|
432
423
|
self.total_iters = total_iters
|
|
433
424
|
self.power = power
|
|
434
|
-
self.min =
|
|
435
|
-
self.cast =
|
|
425
|
+
self.min = ops.Minimum()
|
|
426
|
+
self.cast = ops.Cast()
|
|
436
427
|
super(PolynomialLR, self).__init__(optimizer, last_epoch)
|
|
437
428
|
|
|
438
429
|
def get_lr(self):
|
|
@@ -804,7 +795,7 @@ class SequentialLR:
|
|
|
804
795
|
|
|
805
796
|
@jit_class
|
|
806
797
|
class ReduceLROnPlateau:
|
|
807
|
-
"""
|
|
798
|
+
r"""
|
|
808
799
|
Reduce learning rate when a metric has stopped improving.
|
|
809
800
|
Models often benefit from reducing the learning rate by a factor
|
|
810
801
|
of 2-10 once learning stagnates. The scheduler reads the metrics `metrics` during execution
|
|
@@ -886,7 +877,7 @@ class ReduceLROnPlateau:
|
|
|
886
877
|
[Tensor(shape=[], dtype=Float32, value= 0.001)]
|
|
887
878
|
[Tensor(shape=[], dtype=Float32, value= 0.001)]
|
|
888
879
|
[Tensor(shape=[], dtype=Float32, value= 0.0001)]
|
|
889
|
-
|
|
880
|
+
"""
|
|
890
881
|
|
|
891
882
|
def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
|
|
892
883
|
threshold=1e-4, threshold_mode='rel', cooldown=0,
|
|
@@ -915,8 +906,8 @@ class ReduceLROnPlateau:
|
|
|
915
906
|
self.cooldown_counter = 0
|
|
916
907
|
self.eps = eps
|
|
917
908
|
self.mode_worse = None
|
|
918
|
-
self.assign =
|
|
919
|
-
self.cast =
|
|
909
|
+
self.assign = ops.Assign()
|
|
910
|
+
self.cast = ops.Cast()
|
|
920
911
|
self.last_epoch = Parameter(Tensor(0, dtype=mstype.int32),
|
|
921
912
|
name='last_epoch_' + self.__class__.__name__)
|
|
922
913
|
|
|
@@ -1079,17 +1070,8 @@ class CyclicLR(LRScheduler):
|
|
|
1079
1070
|
[Tensor(shape=[], dtype=Float32, value= 0.010225)]
|
|
1080
1071
|
"""
|
|
1081
1072
|
|
|
1082
|
-
def __init__(self,
|
|
1083
|
-
|
|
1084
|
-
base_lr,
|
|
1085
|
-
max_lr,
|
|
1086
|
-
step_size_up=2000,
|
|
1087
|
-
step_size_down=None,
|
|
1088
|
-
mode='triangular',
|
|
1089
|
-
gamma=1.,
|
|
1090
|
-
scale_fn=None,
|
|
1091
|
-
scale_mode='cycle',
|
|
1092
|
-
last_epoch=-1):
|
|
1073
|
+
def __init__(self, optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular',
|
|
1074
|
+
gamma=1.0, scale_fn=None, scale_mode='cycle', last_epoch=-1):
|
|
1093
1075
|
|
|
1094
1076
|
base_lrs = self._preprocess_input_param(optimizer, base_lr, 'base_lr')
|
|
1095
1077
|
|
|
@@ -1117,7 +1099,7 @@ class CyclicLR(LRScheduler):
|
|
|
1117
1099
|
self._scale_fn_custom = scale_fn
|
|
1118
1100
|
self.scale_mode = scale_mode
|
|
1119
1101
|
self._init_scale_fn()
|
|
1120
|
-
self.floor =
|
|
1102
|
+
self.floor = ops.Floor()
|
|
1121
1103
|
|
|
1122
1104
|
super(CyclicLR, self).__init__(optimizer, last_epoch)
|
|
1123
1105
|
self.base_lrs = [Tensor(lr) for lr in base_lrs]
|
|
@@ -1252,12 +1234,12 @@ class CosineAnnealingWarmRestarts(LRScheduler):
|
|
|
1252
1234
|
self.zero_tensor = Tensor(0, mstype.int32)
|
|
1253
1235
|
|
|
1254
1236
|
self.math_pi = math.pi
|
|
1255
|
-
self.cos =
|
|
1256
|
-
self.cast =
|
|
1257
|
-
self.log =
|
|
1258
|
-
self.cast =
|
|
1259
|
-
self.assign =
|
|
1260
|
-
self.floor =
|
|
1237
|
+
self.cos = ops.Cos()
|
|
1238
|
+
self.cast = ops.Cast()
|
|
1239
|
+
self.log = ops.Log()
|
|
1240
|
+
self.cast = ops.Cast()
|
|
1241
|
+
self.assign = ops.Assign()
|
|
1242
|
+
self.floor = ops.Floor()
|
|
1261
1243
|
self._last_lr = [group["lr"] for group in optimizer.param_groups]
|
|
1262
1244
|
super(CosineAnnealingWarmRestarts, self).__init__(optimizer, last_epoch)
|
|
1263
1245
|
|
|
@@ -1306,7 +1288,7 @@ class CosineAnnealingWarmRestarts(LRScheduler):
|
|
|
1306
1288
|
|
|
1307
1289
|
for i, data in enumerate(zip(self.optimizer.param_groups, self.get_lr())):
|
|
1308
1290
|
_, lr = data
|
|
1309
|
-
|
|
1291
|
+
ops.assign(self.optimizer.param_groups[i]["lr"], lr)
|
|
1310
1292
|
|
|
1311
1293
|
|
|
1312
1294
|
@jit_class
|
|
@@ -1371,8 +1353,8 @@ class CosineAnnealingLR(LRScheduler):
|
|
|
1371
1353
|
self.T_max = T_max
|
|
1372
1354
|
self.eta_min = eta_min
|
|
1373
1355
|
self.math_pi = math.pi
|
|
1374
|
-
self.cos =
|
|
1375
|
-
self.cast =
|
|
1356
|
+
self.cos = ops.Cos()
|
|
1357
|
+
self.cast = ops.Cast()
|
|
1376
1358
|
super(CosineAnnealingLR, self).__init__(optimizer, last_epoch)
|
|
1377
1359
|
|
|
1378
1360
|
def get_lr(self):
|
|
@@ -1,30 +1,20 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
# The code implementation refers to the following files from pytorch:
|
|
2
|
+
# - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/nadam.py
|
|
3
|
+
# Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
|
|
14
4
|
# ============================================================================
|
|
15
5
|
"""nadam"""
|
|
16
6
|
from __future__ import absolute_import
|
|
17
7
|
|
|
18
|
-
from mindspore
|
|
8
|
+
from mindspore import ops
|
|
19
9
|
from mindspore.common import Parameter, Tensor
|
|
20
10
|
import mindspore.common.dtype as mstype
|
|
21
11
|
from mindspore import _checkparam as validator
|
|
22
12
|
from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
|
|
23
13
|
from mindspore import jit
|
|
24
14
|
|
|
25
|
-
_nadam_opt =
|
|
15
|
+
_nadam_opt = ops.MultitypeFuncGraph("nadam_opt")
|
|
26
16
|
|
|
27
|
-
op_sqrt =
|
|
17
|
+
op_sqrt = ops.Sqrt()
|
|
28
18
|
|
|
29
19
|
|
|
30
20
|
@_nadam_opt.register("Number", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor",
|
|
@@ -34,15 +24,15 @@ def _tensor_run_opt(beta1, beta2, momentum_decay, eps, step_t, lr, param, grad,
|
|
|
34
24
|
bias_correction2 = 1 - beta2 ** step_t
|
|
35
25
|
mu = beta1 * (1. - 0.5 * (0.96 ** (step_t * momentum_decay)))
|
|
36
26
|
mu_next = beta1 * (1. - 0.5 * (0.96 ** ((step_t + 1) * momentum_decay)))
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
27
|
+
ops.assign(mu_product, mu_product * mu)
|
|
28
|
+
ops.assign(exp_avg, exp_avg * beta1 + grad * (1 - beta1))
|
|
29
|
+
ops.assign(exp_avg_sq, exp_avg_sq * beta2 + grad * grad * (1 - beta2))
|
|
40
30
|
|
|
41
31
|
denom = op_sqrt(exp_avg_sq / bias_correction2) + eps
|
|
42
32
|
|
|
43
33
|
mu_product_next = mu_product * mu_next
|
|
44
|
-
|
|
45
|
-
|
|
34
|
+
ops.assign(param, param - lr * (1. - mu) / (1. - mu_product) * grad / denom)
|
|
35
|
+
ops.assign(param, param - (lr * mu_next) / (1. - mu_product_next) * exp_avg / denom)
|
|
46
36
|
|
|
47
37
|
return True
|
|
48
38
|
|
|
@@ -122,8 +112,8 @@ class NAdam(Optimizer):
|
|
|
122
112
|
self.mu_product = [Parameter(Tensor(1.), "mu_product_" + param.name) for param in self.parameters]
|
|
123
113
|
|
|
124
114
|
self.increase_tensor = Tensor(1, mstype.int32)
|
|
125
|
-
self.assignadd =
|
|
126
|
-
self.op_cast =
|
|
115
|
+
self.assignadd = ops.AssignAdd()
|
|
116
|
+
self.op_cast = ops.Cast()
|
|
127
117
|
|
|
128
118
|
@jit
|
|
129
119
|
def implementation(self, lr, beta1, beta2, weight_decay, momentum_decay, eps, start_id, end_id, gradients):
|
|
@@ -135,7 +125,7 @@ class NAdam(Optimizer):
|
|
|
135
125
|
exp_avg_sq = self.exp_avg_sq[start_id: end_id]
|
|
136
126
|
mu_product = self.mu_product[start_id: end_id]
|
|
137
127
|
|
|
138
|
-
self.hyper_map(
|
|
128
|
+
self.hyper_map(ops.partial(_nadam_opt, beta1, beta2, momentum_decay, eps, self.step_t, lr),
|
|
139
129
|
params, grads, exp_avg, exp_avg_sq, mu_product)
|
|
140
130
|
return True
|
|
141
131
|
|