mindspore 2.1.0__cp37-cp37m-win_amd64.whl → 2.2.11__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +13 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +67 -72
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +86 -106
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/amp.py +47 -11
- mindspore/atlprov.dll +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +29 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +33 -7
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +61 -95
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +87 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +31 -19
- mindspore/ops/operations/_grad_ops.py +71 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +192 -144
- mindspore/ops/operations/nn_ops.py +857 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +42 -21
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
mindspore/nn/optim/ada_grad.py
CHANGED
|
@@ -166,7 +166,7 @@ class Adagrad(Optimizer):
|
|
|
166
166
|
>>> import mindspore.nn as nn
|
|
167
167
|
>>>
|
|
168
168
|
>>> # Define the network structure of LeNet5. Refer to
|
|
169
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
169
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
170
170
|
>>> net = LeNet5()
|
|
171
171
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
172
172
|
>>> optim = nn.Adagrad(params=net.trainable_params())
|
|
@@ -205,6 +205,7 @@ class Adagrad(Optimizer):
|
|
|
205
205
|
grads = self.gradients_centralization(grads)
|
|
206
206
|
grads = self.scale_grad(grads)
|
|
207
207
|
lr = self.get_lr()
|
|
208
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
208
209
|
if self.is_group_lr:
|
|
209
210
|
success = self.map_reverse(F.partial(_ada_grad_opt, self.opt), lr, params, accum,
|
|
210
211
|
grads)
|
mindspore/nn/optim/adadelta.py
CHANGED
|
@@ -194,6 +194,7 @@ class Adadelta(Optimizer):
|
|
|
194
194
|
grads = self.gradients_centralization(grads)
|
|
195
195
|
grads = self.scale_grad(grads)
|
|
196
196
|
lr = self.get_lr()
|
|
197
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
197
198
|
if self.is_group_lr:
|
|
198
199
|
success = self.map_reverse(F.partial(_adadelta_opt, self.opt, self.rho, self.epsilon), lr, params,
|
|
199
200
|
self.accum, self.accum_update, grads)
|
mindspore/nn/optim/adafactor.py
CHANGED
|
@@ -264,7 +264,7 @@ class AdaFactor(Optimizer):
|
|
|
264
264
|
>>> from mindspore import nn
|
|
265
265
|
>>>
|
|
266
266
|
>>> # Define the network structure of LeNet5. Refer to
|
|
267
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
267
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
268
268
|
>>> net = LeNet5()
|
|
269
269
|
>>> #1) Parameters use the default learning rate with None and weight decay with 0.
|
|
270
270
|
>>> optim = nn.AdaFactor(params=net.trainable_params())
|
|
@@ -410,6 +410,7 @@ class AdaFactor(Optimizer):
|
|
|
410
410
|
def construct(self, gradients):
|
|
411
411
|
gradients = self.flatten_gradients(gradients)
|
|
412
412
|
lr = self.get_lr()
|
|
413
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
413
414
|
step = F.assign_add(self.step, 1)
|
|
414
415
|
if self.scale_lr and self.relative_step:
|
|
415
416
|
if self.warmup_init:
|
mindspore/nn/optim/adam.py
CHANGED
|
@@ -719,7 +719,7 @@ class Adam(Optimizer):
|
|
|
719
719
|
>>> from mindspore import nn
|
|
720
720
|
>>>
|
|
721
721
|
>>> # Define the network structure of LeNet5. Refer to
|
|
722
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
722
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
723
723
|
>>> net = LeNet5()
|
|
724
724
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
725
725
|
>>> optim = nn.Adam(params=net.trainable_params())
|
|
@@ -918,6 +918,7 @@ class Adam(Optimizer):
|
|
|
918
918
|
gradients = self.scale_grad(gradients)
|
|
919
919
|
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
920
920
|
lr = self.get_lr()
|
|
921
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
921
922
|
|
|
922
923
|
beta1_power = self.beta1_power * self.beta1
|
|
923
924
|
self.beta1_power = beta1_power
|
|
@@ -985,7 +986,7 @@ class AdamWeightDecay(Optimizer):
|
|
|
985
986
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
986
987
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
987
988
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
|
|
988
|
-
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.
|
|
989
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
|
|
989
990
|
process `loss_scale` correctly.
|
|
990
991
|
|
|
991
992
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -1069,7 +1070,7 @@ class AdamWeightDecay(Optimizer):
|
|
|
1069
1070
|
>>> from mindspore import nn
|
|
1070
1071
|
>>>
|
|
1071
1072
|
>>> # Define the network structure of LeNet5. Refer to
|
|
1072
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
1073
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
1073
1074
|
>>> net = LeNet5()
|
|
1074
1075
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
1075
1076
|
>>> optim = nn.AdamWeightDecay(params=net.trainable_params())
|
|
@@ -1109,6 +1110,7 @@ class AdamWeightDecay(Optimizer):
|
|
|
1109
1110
|
gradients = self.flatten_gradients(gradients)
|
|
1110
1111
|
weight_decay = self.get_weight_decay()
|
|
1111
1112
|
lr = self.get_lr()
|
|
1113
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
1112
1114
|
|
|
1113
1115
|
if self.use_fused_opt:
|
|
1114
1116
|
if self.is_group:
|
|
@@ -1282,7 +1284,7 @@ class AdamOffload(Optimizer):
|
|
|
1282
1284
|
>>> from mindspore import nn
|
|
1283
1285
|
>>>
|
|
1284
1286
|
>>> # Define the network structure of LeNet5. Refer to
|
|
1285
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
1287
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
1286
1288
|
>>> net = LeNet5()
|
|
1287
1289
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
1288
1290
|
>>> optim = nn.AdamOffload(params=net.trainable_params())
|
|
@@ -1330,6 +1332,7 @@ class AdamOffload(Optimizer):
|
|
|
1330
1332
|
gradients = self.decay_weight(gradients)
|
|
1331
1333
|
gradients = self.scale_grad(gradients)
|
|
1332
1334
|
lr = self.get_lr()
|
|
1335
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
1333
1336
|
|
|
1334
1337
|
beta1_power = self.beta1_power * self.beta1
|
|
1335
1338
|
self.beta1_power = beta1_power
|
mindspore/nn/optim/adamax.py
CHANGED
|
@@ -66,7 +66,7 @@ class AdaMax(Optimizer):
|
|
|
66
66
|
:math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector,
|
|
67
67
|
:math:`g` represents `gradients`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
|
|
68
68
|
:math:`t` represents the current step, :math:`beta_1^t` represent `beta1_power`,
|
|
69
|
-
:math
|
|
69
|
+
:math:`l` represents `learning_rate`, :math:`w` represents `params`,
|
|
70
70
|
:math:`\epsilon` represents `eps`.
|
|
71
71
|
|
|
72
72
|
Note:
|
|
@@ -161,7 +161,7 @@ class AdaMax(Optimizer):
|
|
|
161
161
|
>>> from mindspore import nn
|
|
162
162
|
>>>
|
|
163
163
|
>>> # Define the network structure of LeNet5. Refer to
|
|
164
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
164
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
165
165
|
>>> net = LeNet5()
|
|
166
166
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
167
167
|
>>> optim = nn.AdaMax(params=net.trainable_params())
|
|
@@ -204,6 +204,7 @@ class AdaMax(Optimizer):
|
|
|
204
204
|
gradients = self.gradients_centralization(gradients)
|
|
205
205
|
gradients = self.scale_grad(gradients)
|
|
206
206
|
lr = self.get_lr()
|
|
207
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
207
208
|
|
|
208
209
|
self.beta1_power *= self.beta1
|
|
209
210
|
|
mindspore/nn/optim/adasum.py
CHANGED
|
@@ -445,7 +445,7 @@ class AdaSumByGradWrapCell(Cell):
|
|
|
445
445
|
>>> import mindspore as ms
|
|
446
446
|
>>> from mindspore import nn
|
|
447
447
|
>>> # Define the network structure of LeNet5. Refer to
|
|
448
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
448
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
449
449
|
>>> net = LeNet5()
|
|
450
450
|
>>> optim = nn.AdaSumByGradWrapCell(nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9))
|
|
451
451
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
@@ -514,7 +514,7 @@ class AdaSumByDeltaWeightWrapCell(Cell):
|
|
|
514
514
|
>>> import mindspore as ms
|
|
515
515
|
>>> from mindspore import nn
|
|
516
516
|
>>> # Define the network structure of LeNet5. Refer to
|
|
517
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
517
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
518
518
|
>>> net = LeNet5()
|
|
519
519
|
>>> optim = nn.AdaSumByDeltaWeightWrapCell(nn.Momentum(params=net.trainable_params(),
|
|
520
520
|
... learning_rate=0.1, momentum=0.9))
|
mindspore/nn/optim/asgd.py
CHANGED
|
@@ -128,7 +128,7 @@ class ASGD(Optimizer):
|
|
|
128
128
|
>>> from mindspore import nn
|
|
129
129
|
>>>
|
|
130
130
|
>>> # Define the network structure of LeNet5. Refer to
|
|
131
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
131
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
132
132
|
>>> net = LeNet5()
|
|
133
133
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
134
134
|
>>> optim = nn.ASGD(params=net.trainable_params())
|
|
@@ -185,8 +185,7 @@ class ASGD(Optimizer):
|
|
|
185
185
|
gradients = self.gradients_centralization(gradients)
|
|
186
186
|
gradients = self.scale_grad(gradients)
|
|
187
187
|
lrs = self.get_lr()
|
|
188
|
-
|
|
189
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
188
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
190
189
|
success = True
|
|
191
190
|
params = self._parameters
|
|
192
191
|
for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, params, self.mu, self.eta, self.ax)):
|
mindspore/nn/optim/ftrl.py
CHANGED
|
@@ -296,7 +296,7 @@ class FTRL(Optimizer):
|
|
|
296
296
|
>>> from mindspore import nn
|
|
297
297
|
>>>
|
|
298
298
|
>>> # Define the network structure of LeNet5. Refer to
|
|
299
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
299
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
300
300
|
>>> net = LeNet5()
|
|
301
301
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
302
302
|
>>> optim = nn.FTRL(params=net.trainable_params())
|
|
@@ -359,6 +359,7 @@ class FTRL(Optimizer):
|
|
|
359
359
|
grads = self.scale_grad(grads)
|
|
360
360
|
grads = self._grad_sparse_indices_deduplicate(grads)
|
|
361
361
|
lr = self.get_lr()
|
|
362
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
362
363
|
|
|
363
364
|
if self.use_dist_optimizer:
|
|
364
365
|
success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self._ps_push, self._ps_pull,
|
|
@@ -379,12 +380,12 @@ class FTRL(Optimizer):
|
|
|
379
380
|
optimizer operation.
|
|
380
381
|
"""
|
|
381
382
|
if not isinstance(value, str):
|
|
382
|
-
raise TypeError("For 'FTRL', the property 'target' must be string type, "
|
|
383
|
-
"but got type {
|
|
383
|
+
raise TypeError(f"For 'FTRL', the property 'target' must be string type, "
|
|
384
|
+
f"but got type {type(value)}.")
|
|
384
385
|
|
|
385
386
|
if value not in ('CPU', 'Ascend', 'GPU'):
|
|
386
|
-
raise ValueError("For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
|
|
387
|
-
"but got {}"
|
|
387
|
+
raise ValueError(f"For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
|
|
388
|
+
f"but got {value}.")
|
|
388
389
|
|
|
389
390
|
if value == 'CPU':
|
|
390
391
|
self.sparse_opt = P.FusedSparseFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
|
mindspore/nn/optim/lamb.py
CHANGED
|
@@ -132,7 +132,7 @@ class Lamb(Optimizer):
|
|
|
132
132
|
There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
|
|
133
133
|
and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
|
|
134
134
|
As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
|
|
135
|
-
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.
|
|
135
|
+
document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
|
|
136
136
|
process `loss_scale` correctly.
|
|
137
137
|
|
|
138
138
|
If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
|
|
@@ -140,6 +140,10 @@ class Lamb(Optimizer):
|
|
|
140
140
|
parameters are grouped, each group can set `weight_decay`. If not, the `weight_decay` in optimizer will be
|
|
141
141
|
applied.
|
|
142
142
|
|
|
143
|
+
.. warning::
|
|
144
|
+
The update process of the Lamb optimizer is not completely elementwise, and the sharding of weights in
|
|
145
|
+
distributed parallel may affect the update result.
|
|
146
|
+
|
|
143
147
|
Args:
|
|
144
148
|
params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the
|
|
145
149
|
`params` is a list of `dict`, the string "params", "lr", "weight_decay", "grad_centralization" and
|
|
@@ -220,7 +224,7 @@ class Lamb(Optimizer):
|
|
|
220
224
|
>>> from mindspore import nn
|
|
221
225
|
>>>
|
|
222
226
|
>>> # Define the network structure of LeNet5. Refer to
|
|
223
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
227
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
224
228
|
>>> net = LeNet5()
|
|
225
229
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
226
230
|
>>> optim = nn.Lamb(params=net.trainable_params(), learning_rate=0.1)
|
|
@@ -263,8 +267,7 @@ class Lamb(Optimizer):
|
|
|
263
267
|
def construct(self, gradients):
|
|
264
268
|
weight_decay = self.get_weight_decay()
|
|
265
269
|
lr = self.get_lr()
|
|
266
|
-
|
|
267
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
270
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
268
271
|
lamb_opt = _lamb_opt
|
|
269
272
|
gradients = self.flatten_gradients(gradients)
|
|
270
273
|
gradients = self.gradients_centralization(gradients)
|
mindspore/nn/optim/lars.py
CHANGED
|
@@ -109,7 +109,7 @@ class LARS(Optimizer):
|
|
|
109
109
|
>>> from mindspore import nn
|
|
110
110
|
>>>
|
|
111
111
|
>>> # Define the network structure of LeNet5. Refer to
|
|
112
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
112
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
113
113
|
>>> net = LeNet5()
|
|
114
114
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
|
|
115
115
|
>>> opt = nn.Momentum(net.trainable_params(), 0.1, 0.9)
|
mindspore/nn/optim/lazyadam.py
CHANGED
|
@@ -321,7 +321,7 @@ class LazyAdam(Optimizer):
|
|
|
321
321
|
If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
|
|
322
322
|
one group of `params`.
|
|
323
323
|
|
|
324
|
-
learning_rate (Union[float, int, Tensor, Iterable,
|
|
324
|
+
learning_rate (Union[float, int, Tensor, Iterable, :class:`~.train.LearningRateScheduler`]): Default: ``1e-3`` .
|
|
325
325
|
|
|
326
326
|
- float: The fixed learning rate value. Must be equal to or greater than 0.
|
|
327
327
|
|
|
@@ -370,7 +370,8 @@ class LazyAdam(Optimizer):
|
|
|
370
370
|
Tensor[bool], the value is ``True`` .
|
|
371
371
|
|
|
372
372
|
Raises:
|
|
373
|
-
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable,
|
|
373
|
+
TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable,
|
|
374
|
+
:class:`~.train.LearningRateScheduler`.
|
|
374
375
|
TypeError: If element of `parameters` is neither Parameter nor dict.
|
|
375
376
|
TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
|
|
376
377
|
TypeError: If `weight_decay` is neither float nor int.
|
|
@@ -387,7 +388,7 @@ class LazyAdam(Optimizer):
|
|
|
387
388
|
>>> from mindspore import nn
|
|
388
389
|
>>>
|
|
389
390
|
>>> # Define the network structure of LeNet5. Refer to
|
|
390
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
391
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
391
392
|
>>> net = LeNet5()
|
|
392
393
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
393
394
|
>>> optim = nn.LazyAdam(params=net.trainable_params())
|
|
@@ -445,6 +446,7 @@ class LazyAdam(Optimizer):
|
|
|
445
446
|
gradients = self.scale_grad(gradients)
|
|
446
447
|
gradients = self._grad_sparse_indices_deduplicate(gradients)
|
|
447
448
|
lr = self.get_lr()
|
|
449
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
448
450
|
|
|
449
451
|
beta1_power = self.beta1_power * self.beta1
|
|
450
452
|
self.beta1_power = beta1_power
|
mindspore/nn/optim/momentum.py
CHANGED
|
@@ -173,7 +173,7 @@ class Momentum(Optimizer):
|
|
|
173
173
|
>>> from mindspore import nn
|
|
174
174
|
>>>
|
|
175
175
|
>>> # Define the network structure of LeNet5. Refer to
|
|
176
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
176
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
177
177
|
>>> net = LeNet5()
|
|
178
178
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
179
179
|
>>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
@@ -220,6 +220,7 @@ class Momentum(Optimizer):
|
|
|
220
220
|
gradients = self.gradients_centralization(gradients)
|
|
221
221
|
gradients = self.scale_grad(gradients)
|
|
222
222
|
lr = self.get_lr()
|
|
223
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
223
224
|
if self.use_dist_optimizer:
|
|
224
225
|
if self.is_group_lr:
|
|
225
226
|
success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
|
mindspore/nn/optim/optimizer.py
CHANGED
|
@@ -140,6 +140,57 @@ class Optimizer(Cell):
|
|
|
140
140
|
|
|
141
141
|
Supported Platforms:
|
|
142
142
|
``Ascend`` ``GPU`` ``CPU``
|
|
143
|
+
|
|
144
|
+
Examples:
|
|
145
|
+
>>> import mindspore as ms
|
|
146
|
+
>>> from mindspore import nn
|
|
147
|
+
>>> import numpy as np
|
|
148
|
+
>>> import mindspore
|
|
149
|
+
>>> from mindspore import nn, ops, Tensor
|
|
150
|
+
>>>
|
|
151
|
+
>>> class MyMomentum(nn.Optimizer):
|
|
152
|
+
... def __init__(self, params, learning_rate, momentum=0.9):
|
|
153
|
+
... super(MyMomentum, self).__init__(learning_rate, params)
|
|
154
|
+
... self.moments = self.parameters.clone(prefix="moments", init="zeros")
|
|
155
|
+
... self.momentum = momentum
|
|
156
|
+
... self.opt = ops.ApplyMomentum()
|
|
157
|
+
...
|
|
158
|
+
... def construct(self, gradients):
|
|
159
|
+
... params = self.parameters
|
|
160
|
+
... lr = self.get_lr()
|
|
161
|
+
... gradients = self.flatten_gradients(gradients)
|
|
162
|
+
... gradients = self.decay_weight(gradients)
|
|
163
|
+
... gradients = self.gradients_centralization(gradients)
|
|
164
|
+
... gradients = self.scale_grad(gradients)
|
|
165
|
+
...
|
|
166
|
+
... success = None
|
|
167
|
+
... for param, mom, grad in zip(params, self.moments, gradients):
|
|
168
|
+
... success = self.opt(param, mom, lr, grad, self.momentum)
|
|
169
|
+
... return success
|
|
170
|
+
>>>
|
|
171
|
+
>>> net = nn.Dense(2, 3)
|
|
172
|
+
>>> loss_fn = nn.MAELoss()
|
|
173
|
+
>>> opt = MyMomentum(net.trainable_params(), 0.01)
|
|
174
|
+
>>>
|
|
175
|
+
>>> device_target = opt.target
|
|
176
|
+
>>> opt_unique = opt.unique
|
|
177
|
+
>>> weight_decay_value = opt.get_weight_decay()
|
|
178
|
+
>>>
|
|
179
|
+
>>> def forward_fn(data, label):
|
|
180
|
+
... logits = net(data)
|
|
181
|
+
... loss = loss_fn(logits, label)
|
|
182
|
+
... return loss, logits
|
|
183
|
+
>>>
|
|
184
|
+
>>> grad_fn = mindspore.value_and_grad(forward_fn, None, opt.parameters, has_aux=True)
|
|
185
|
+
>>>
|
|
186
|
+
>>> def train_step(data, label):
|
|
187
|
+
... (loss, _), grads = grad_fn(data, label)
|
|
188
|
+
... opt(grads)
|
|
189
|
+
... return loss
|
|
190
|
+
>>>
|
|
191
|
+
>>> data = Tensor(np.random.rand(4, 10, 2), mindspore.dtype.float32)
|
|
192
|
+
>>> label = Tensor(np.random.rand(4, 10, 3), mindspore.dtype.float32)
|
|
193
|
+
>>> train_step(data, label)
|
|
143
194
|
"""
|
|
144
195
|
_support_parallel_optimizer = False
|
|
145
196
|
|
|
@@ -233,7 +284,7 @@ class Optimizer(Cell):
|
|
|
233
284
|
self.cache_enable = tuple(cache_filter(x) for x in self._parameters)
|
|
234
285
|
self.reciprocal_scale = Tensor(1.0 / self.loss_scale, mstype.float32)
|
|
235
286
|
self.need_scale = self.loss_scale != 1.0
|
|
236
|
-
self.global_step_increase_tensor = Tensor(1, mstype.int32)
|
|
287
|
+
self.global_step_increase_tensor = Tensor([1], mstype.int32)
|
|
237
288
|
self.param_length = len(self._parameters)
|
|
238
289
|
self.map_ = C.Map()
|
|
239
290
|
self.map_reverse = C.Map(None, True)
|
|
@@ -702,8 +753,6 @@ class Optimizer(Cell):
|
|
|
702
753
|
lr += (current_dynamic_lr,)
|
|
703
754
|
else:
|
|
704
755
|
lr = self.learning_rate(self.global_step).reshape(())
|
|
705
|
-
if self._is_dynamic_lr_or_weight_decay():
|
|
706
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
707
756
|
return lr
|
|
708
757
|
|
|
709
758
|
def get_lr_parameter(self, param):
|
|
@@ -722,7 +771,7 @@ class Optimizer(Cell):
|
|
|
722
771
|
Examples:
|
|
723
772
|
>>> from mindspore import nn
|
|
724
773
|
>>> # Define the network structure of LeNet5. Refer to
|
|
725
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
774
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
726
775
|
>>> net = LeNet5()
|
|
727
776
|
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
|
|
728
777
|
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
|
|
@@ -55,9 +55,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
|
|
|
55
55
|
|
|
56
56
|
class ProximalAdagrad(Optimizer):
|
|
57
57
|
r"""
|
|
58
|
-
Implements the ProximalAdagrad algorithm.
|
|
59
|
-
|
|
60
|
-
ProximalAdagrad is an online Learning and Stochastic Optimization.
|
|
58
|
+
Implements the ProximalAdagrad algorithm that is an online Learning and Stochastic Optimization.
|
|
61
59
|
Refer to paper `Efficient Learning using Forward-Backward Splitting
|
|
62
60
|
<http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
|
|
63
61
|
|
|
@@ -165,7 +163,7 @@ class ProximalAdagrad(Optimizer):
|
|
|
165
163
|
>>> from mindspore import nn
|
|
166
164
|
>>>
|
|
167
165
|
>>> # Define the network structure of LeNet5. Refer to
|
|
168
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
166
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
169
167
|
>>> net = LeNet5()
|
|
170
168
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
171
169
|
>>> optim = nn.ProximalAdagrad(params=net.trainable_params())
|
|
@@ -209,6 +207,7 @@ class ProximalAdagrad(Optimizer):
|
|
|
209
207
|
grads = self.scale_grad(grads)
|
|
210
208
|
grads = self._grad_sparse_indices_deduplicate(grads)
|
|
211
209
|
lr = self.get_lr()
|
|
210
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
212
211
|
if self.is_group_lr:
|
|
213
212
|
success = self.map_reverse(F.partial(_proximal_ada_grad_opt, self.opt, self.sparse_opt, self.l1, self.l2),
|
|
214
213
|
lr, grads, params, accum)
|
mindspore/nn/optim/rmsprop.py
CHANGED
|
@@ -47,8 +47,8 @@ class RMSProp(Optimizer):
|
|
|
47
47
|
Implements Root Mean Squared Propagation (RMSProp) algorithm.
|
|
48
48
|
|
|
49
49
|
Update `params` according to the RMSProp algorithm.
|
|
50
|
-
The 29th of the original presentation slide
|
|
51
|
-
|
|
50
|
+
The 29th of the original `presentation slide
|
|
51
|
+
<http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ proposes RMSProp.
|
|
52
52
|
The equation is as follows:
|
|
53
53
|
|
|
54
54
|
.. math::
|
|
@@ -180,7 +180,7 @@ class RMSProp(Optimizer):
|
|
|
180
180
|
>>> from mindspore import nn
|
|
181
181
|
>>>
|
|
182
182
|
>>> # Define the network structure of LeNet5. Refer to
|
|
183
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
183
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
184
184
|
>>> net = LeNet5()
|
|
185
185
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
186
186
|
>>> optim = nn.RMSProp(params=net.trainable_params(), learning_rate=0.1)
|
|
@@ -236,6 +236,7 @@ class RMSProp(Optimizer):
|
|
|
236
236
|
gradients = self.gradients_centralization(gradients)
|
|
237
237
|
gradients = self.scale_grad(gradients)
|
|
238
238
|
lr = self.get_lr()
|
|
239
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
239
240
|
if self.centered:
|
|
240
241
|
if self.is_group_lr:
|
|
241
242
|
success = self.hyper_map_reverse(F.partial(_centered_rmsprop_opt, self.opt, self.decay, self.epsilon,
|
mindspore/nn/optim/rprop.py
CHANGED
|
@@ -135,7 +135,7 @@ class Rprop(Optimizer):
|
|
|
135
135
|
>>> from mindspore import nn
|
|
136
136
|
>>>
|
|
137
137
|
>>> # Define the network structure of LeNet5. Refer to
|
|
138
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
138
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
139
139
|
>>> net = LeNet5()
|
|
140
140
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
141
141
|
>>> optim = nn.Rprop(params=net.trainable_params())
|
|
@@ -189,8 +189,8 @@ class Rprop(Optimizer):
|
|
|
189
189
|
self.prev = self._parameters.clone(prefix="prev", init='zeros')
|
|
190
190
|
self.step_size = self._parameters.clone(prefix="step_size", init='zeros')
|
|
191
191
|
|
|
192
|
-
self.fill = P.Fill()
|
|
193
192
|
self.sign = P.Sign()
|
|
193
|
+
self.fill = P.FillV2()
|
|
194
194
|
self.assign = P.Assign()
|
|
195
195
|
self.assignadd = P.AssignAdd()
|
|
196
196
|
self.cast = P.Cast()
|
|
@@ -204,8 +204,7 @@ class Rprop(Optimizer):
|
|
|
204
204
|
gradients = self.gradients_centralization(gradients)
|
|
205
205
|
gradients = self.scale_grad(gradients)
|
|
206
206
|
lrs = self.get_lr()
|
|
207
|
-
|
|
208
|
-
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
207
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
209
208
|
success = True
|
|
210
209
|
|
|
211
210
|
for index, (grad, param, prev, step_size) in enumerate(zip(gradients, self._parameters,
|
|
@@ -221,14 +220,26 @@ class Rprop(Optimizer):
|
|
|
221
220
|
param_fp32 = self.cast(param, mstype.float32)
|
|
222
221
|
|
|
223
222
|
sign = self.sign(gradient_fp32 * prev)
|
|
224
|
-
sign = self.select(
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
223
|
+
sign = self.select(
|
|
224
|
+
sign > 0,
|
|
225
|
+
self.fill(sign.shape, self.cast(self.etaplus, mstype.float32)),
|
|
226
|
+
sign)
|
|
227
|
+
sign = self.select(
|
|
228
|
+
sign < 0,
|
|
229
|
+
self.fill(sign.shape, self.cast(self.etaminus,
|
|
230
|
+
mstype.float32)), sign)
|
|
231
|
+
sign = self.select(
|
|
232
|
+
sign == 0, self.fill(sign.shape,
|
|
233
|
+
self.cast(1., mstype.float32)), sign)
|
|
234
|
+
|
|
235
|
+
step_size_fp32 = ops.clip_by_value(step_size_fp32 * sign,
|
|
236
|
+
self.step_size_min,
|
|
237
|
+
self.step_size_max)
|
|
238
|
+
|
|
239
|
+
gradient_update = self.select(
|
|
240
|
+
sign == self.etaminus,
|
|
241
|
+
self.fill(sign.shape, self.cast(0., mstype.float32)),
|
|
242
|
+
gradient_fp32)
|
|
232
243
|
next_param = param_fp32 - self.sign(gradient_update) * step_size_fp32
|
|
233
244
|
|
|
234
245
|
self.assign(param, self.cast(next_param, param.dtype))
|
mindspore/nn/optim/sgd.py
CHANGED
|
@@ -132,7 +132,7 @@ class SGD(Optimizer):
|
|
|
132
132
|
>>> from mindspore import nn
|
|
133
133
|
>>>
|
|
134
134
|
>>> # Define the network structure of LeNet5. Refer to
|
|
135
|
-
>>> # https://gitee.com/mindspore/docs/blob/r2.
|
|
135
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
136
136
|
>>> net = LeNet5()
|
|
137
137
|
>>> #1) All parameters use the same learning rate and weight decay
|
|
138
138
|
>>> optim = nn.SGD(params=net.trainable_params())
|
|
@@ -163,29 +163,29 @@ class SGD(Optimizer):
|
|
|
163
163
|
if isinstance(momentum, int):
|
|
164
164
|
momentum = float(momentum)
|
|
165
165
|
if not isinstance(momentum, float):
|
|
166
|
-
raise TypeError("For 'SGD', the argument 'momentum' must be float type, "
|
|
167
|
-
"but got {
|
|
166
|
+
raise TypeError(f"For 'SGD', the argument 'momentum' must be float type, "
|
|
167
|
+
f"but got {type(momentum)}.")
|
|
168
168
|
|
|
169
169
|
if isinstance(momentum, float) and momentum < 0.0:
|
|
170
|
-
raise ValueError("For 'SGD', the argument 'momentum' must be at least 0.0, "
|
|
171
|
-
"but got {}."
|
|
170
|
+
raise ValueError(f"For 'SGD', the argument 'momentum' must be at least 0.0, "
|
|
171
|
+
f"but got {momentum}.")
|
|
172
172
|
|
|
173
173
|
if isinstance(dampening, int):
|
|
174
174
|
dampening = float(dampening)
|
|
175
175
|
if not isinstance(dampening, float):
|
|
176
|
-
raise TypeError("For 'SGD', the argument 'dampening' must be float type, "
|
|
177
|
-
"but got {
|
|
176
|
+
raise TypeError(f"For 'SGD', the argument 'dampening' must be float type, "
|
|
177
|
+
f"but got {type(dampening)}.")
|
|
178
178
|
|
|
179
179
|
if dampening < 0.0:
|
|
180
|
-
raise ValueError("For 'SGD', the argument 'dampening' must be at least 0.0, "
|
|
181
|
-
"but got 'dampening' {}"
|
|
180
|
+
raise ValueError(f"For 'SGD', the argument 'dampening' must be at least 0.0, "
|
|
181
|
+
f"but got 'dampening' {dampening}")
|
|
182
182
|
self.dampening = dampening
|
|
183
183
|
|
|
184
184
|
validator.check_value_type("nesterov", nesterov, [bool], self.cls_name)
|
|
185
185
|
|
|
186
186
|
if nesterov and (momentum <= 0.0 or dampening != 0.0):
|
|
187
|
-
raise ValueError("For 'SGD', if 'nesterov' is true, 'momentum' must be > 0.0 and 'dampening' must "
|
|
188
|
-
"equal to 0.0, but got 'momentum' {}, 'dampening' {}"
|
|
187
|
+
raise ValueError(f"For 'SGD', if 'nesterov' is true, 'momentum' must be > 0.0 and 'dampening' must "
|
|
188
|
+
f"equal to 0.0, but got 'momentum' {momentum}, 'dampening' {dampening}.")
|
|
189
189
|
self.nesterov = nesterov
|
|
190
190
|
|
|
191
191
|
if self.dynamic_weight_decay:
|
|
@@ -198,9 +198,23 @@ class SGD(Optimizer):
|
|
|
198
198
|
self.opt = tuple([P.SGD(dampening, float(weight_decay), nesterov)] * len(self._parameters))
|
|
199
199
|
|
|
200
200
|
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
|
|
201
|
+
|
|
202
|
+
if not momentum > 0.0:
|
|
203
|
+
enable_cache_param_list = []
|
|
204
|
+
for param in self._parameters:
|
|
205
|
+
if param.cache_enable:
|
|
206
|
+
enable_cache_param_list.append(param)
|
|
207
|
+
param.cache_enable = False
|
|
208
|
+
|
|
201
209
|
self.accum = self._parameters.clone(prefix="accum", init='zeros')
|
|
202
210
|
self.stat = self._parameters.clone(prefix="stat", init='ones')
|
|
203
211
|
|
|
212
|
+
|
|
213
|
+
if not momentum > 0.0:
|
|
214
|
+
for param in enable_cache_param_list:
|
|
215
|
+
param.cache_enable = True
|
|
216
|
+
|
|
217
|
+
|
|
204
218
|
@jit
|
|
205
219
|
def construct(self, gradients):
|
|
206
220
|
params = self._parameters
|
|
@@ -210,6 +224,7 @@ class SGD(Optimizer):
|
|
|
210
224
|
gradients = self.gradients_centralization(gradients)
|
|
211
225
|
gradients = self.scale_grad(gradients)
|
|
212
226
|
lr = self.get_lr()
|
|
227
|
+
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
|
213
228
|
if self.is_group_lr:
|
|
214
229
|
success = self.hyper_map_reverse(F.partial(_sgd_opt, self.momentum),
|
|
215
230
|
lr, gradients, params, accum, stat, self.opt)
|