mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +8 -1
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +275 -64
- mindspore/common/_utils.py +0 -44
- mindspore/common/api.py +285 -35
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
- mindspore/common/hook_handle.py +60 -0
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/parameter.py +13 -107
- mindspore/common/recompute.py +4 -11
- mindspore/common/tensor.py +16 -169
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +5 -85
- mindspore/dataset/engine/datasets.py +8 -4
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dnnl.dll +0 -0
- mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +1 -1
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/distributed.py +182 -62
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +4 -4
- mindspore/mint/nn/layer/normalization.py +8 -13
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +16 -66
- mindspore/nn/layer/basic.py +49 -1
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +31 -124
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +0 -1
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +0 -1
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/wrap/grad_reducer.py +4 -74
- mindspore/numpy/array_creations.py +2 -2
- mindspore/numpy/fft.py +9 -9
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +0 -5
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
- mindspore/ops/auto_generate/gen_extend_func.py +2 -7
- mindspore/ops/auto_generate/gen_ops_def.py +98 -141
- mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +15 -1
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +1 -0
- mindspore/ops/function/array_func.py +14 -12
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +3 -4
- mindspore/ops/function/math_func.py +45 -54
- mindspore/ops/function/nn_func.py +75 -294
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +2 -0
- mindspore/ops/functional_overload.py +354 -18
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +1 -38
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +1 -0
- mindspore/ops/operations/comm_ops.py +94 -2
- mindspore/ops/operations/custom_ops.py +228 -19
- mindspore/ops/operations/debug_ops.py +27 -29
- mindspore/ops/operations/manually_defined/ops_def.py +27 -306
- mindspore/ops/operations/nn_ops.py +2 -2
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +1 -17
- mindspore/ops/tensor_method.py +72 -3
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +1 -4
- mindspore/parallel/_utils.py +29 -6
- mindspore/parallel/checkpoint_transform.py +18 -2
- mindspore/parallel/cluster/process_entity/_api.py +24 -32
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +117 -16
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +38 -2
- mindspore/profiler/common/path_manager.py +56 -24
- mindspore/profiler/common/profiler_context.py +2 -12
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/experimental_config.py +2 -1
- mindspore/profiler/platform/npu_profiler.py +33 -6
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +3 -2
- mindspore/runtime/executor.py +11 -3
- mindspore/runtime/memory.py +112 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +5 -18
- mindspore/train/amp.py +6 -4
- mindspore/train/callback/_checkpoint.py +0 -9
- mindspore/train/callback/_train_fault_tolerance.py +69 -18
- mindspore/train/data_sink.py +1 -5
- mindspore/train/model.py +38 -211
- mindspore/train/serialization.py +126 -387
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +144 -8
- mindspore/version.py +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/train/memory_profiling_pb2.py +0 -298
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
mindspore/mint/nn/functional.py
CHANGED
|
@@ -15,10 +15,7 @@
|
|
|
15
15
|
"""mint nn functional."""
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
import mindspore.ops as ops
|
|
18
|
-
import mindspore.mint as mint
|
|
19
|
-
from mindspore import log as logger
|
|
20
18
|
from mindspore import _checkparam as validator
|
|
21
|
-
from mindspore.ops.primitive import constexpr
|
|
22
19
|
from mindspore.ops.function.nn_func import max_pool2d_ext as max_pool2d
|
|
23
20
|
from mindspore.ops.functional import (
|
|
24
21
|
conv_transpose2d,
|
|
@@ -55,8 +52,9 @@ from mindspore.ops.function.nn_func import relu_
|
|
|
55
52
|
|
|
56
53
|
# 14
|
|
57
54
|
from mindspore.ops.function.nn_func import dropout_ext as dropout
|
|
55
|
+
from mindspore.ops.function.nn_func import dropout2d_ext as dropout2d
|
|
58
56
|
# 15
|
|
59
|
-
from mindspore.ops.
|
|
57
|
+
from mindspore.ops.functional_overload import conv1d
|
|
60
58
|
from mindspore.ops.function.nn_func import conv2d_ext as conv2d
|
|
61
59
|
# 16
|
|
62
60
|
from mindspore.ops.function.nn_func import log_softmax_ext as log_softmax
|
|
@@ -126,6 +124,7 @@ from mindspore.ops.auto_generate import inplace_silu
|
|
|
126
124
|
|
|
127
125
|
# 49
|
|
128
126
|
from mindspore.ops.functional import sigmoid
|
|
127
|
+
from mindspore.ops.functional import inplace_sigmoid as sigmoid_
|
|
129
128
|
# 50
|
|
130
129
|
|
|
131
130
|
# 51
|
|
@@ -502,9 +501,6 @@ def relu6(input, inplace=False):
|
|
|
502
501
|
.. image:: ../images/ReLU6.png
|
|
503
502
|
:align: center
|
|
504
503
|
|
|
505
|
-
.. warning::
|
|
506
|
-
This is an experimental optimizer API that is subject to change.
|
|
507
|
-
|
|
508
504
|
Args:
|
|
509
505
|
input (Tensor): input Tensor. Dtype is in int8, int16, int32, int64, uint8, float16, float32, bfloat16.
|
|
510
506
|
inplace (bool, optional): Whether to apply erasing inplace. Default: ``False``.
|
|
@@ -770,9 +766,6 @@ def smooth_l1_loss(input, target, reduction='mean', beta=1.0):
|
|
|
770
766
|
Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
|
|
771
767
|
:math:`\text{beta} \geq 0` , its default value is ``1.0`` . :math:`N` is the batch size.
|
|
772
768
|
|
|
773
|
-
.. warning::
|
|
774
|
-
This is an experimental optimizer API that is subject to change.
|
|
775
|
-
|
|
776
769
|
Note:
|
|
777
770
|
- Arg `input` and `target` comply with the implicit type conversion rules to make the data types consistent.
|
|
778
771
|
If they have different data types, the lower precision data type will be converted to relatively the
|
|
@@ -837,104 +830,6 @@ def smooth_l1_loss(input, target, reduction='mean', beta=1.0):
|
|
|
837
830
|
return ops.function.smooth_l1_loss(input, target, beta, reduction)
|
|
838
831
|
|
|
839
832
|
|
|
840
|
-
@constexpr
|
|
841
|
-
def log_warning(msg):
|
|
842
|
-
"""Adds warning to logger."""
|
|
843
|
-
logger.warning(msg)
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
def dropout2d(input, p=0.5, training=True):
|
|
847
|
-
r"""
|
|
848
|
-
During training, randomly zeroes some channels of the input tensor with probability `p`
|
|
849
|
-
from a Bernoulli distribution (For a 4-dimensional tensor with a shape of :math:`(N, C, H, W)`,
|
|
850
|
-
the channel feature map refers to a 2-dimensional feature map with the shape of :math:`(H, W)`).
|
|
851
|
-
|
|
852
|
-
For example, the :math:`j\_th` channel of the :math:`i\_th` sample in the batched input is a to-be-processed
|
|
853
|
-
`2D` tensor input[i,j].
|
|
854
|
-
Each channel will be zeroed out independently on every forward call which based on Bernoulli distribution
|
|
855
|
-
probability `p`.
|
|
856
|
-
The parper `Dropout: A Simple Way to Prevent Neural Networks from Overfitting
|
|
857
|
-
<http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf>`_ mentioned this technology, and it is proved that
|
|
858
|
-
it can effectively reduce over fitting and prevent neuronal coadaptation.
|
|
859
|
-
For more details, refer to `Improving neural networks by preventing co-adaptation of feature detectors
|
|
860
|
-
<https://arxiv.org/pdf/1207.0580.pdf>`_ .
|
|
861
|
-
|
|
862
|
-
`dropout2d` can improve the independence between channel feature maps.
|
|
863
|
-
|
|
864
|
-
.. warning::
|
|
865
|
-
This is an experimental API that is subject to change or deletion.
|
|
866
|
-
|
|
867
|
-
Args:
|
|
868
|
-
input (Tensor): A `4D` tensor with shape :math:`(N, C, H, W)`, where `N` is the batch size, `C` is the number
|
|
869
|
-
of channels, `H` is the feature height, and `W` is the feature width.
|
|
870
|
-
p (float, optional): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
|
|
871
|
-
which means dropping out 80% of channels. Default: ``0.5`` .
|
|
872
|
-
training(bool, optional): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
|
|
873
|
-
|
|
874
|
-
Returns:
|
|
875
|
-
Tensor, output, with the same shape and data type as `input`.
|
|
876
|
-
|
|
877
|
-
Raises:
|
|
878
|
-
TypeError: If `input` is not a Tensor.
|
|
879
|
-
TypeError: If the data type of `p` is not float.
|
|
880
|
-
ValueError: If `p` is out of the range `[0.0, 1.0]`.
|
|
881
|
-
|
|
882
|
-
Supported Platforms:
|
|
883
|
-
``Ascend``
|
|
884
|
-
|
|
885
|
-
Examples:
|
|
886
|
-
>>> import mindspore
|
|
887
|
-
>>> import numpy as np
|
|
888
|
-
>>> from mindspore import Tensor, mint
|
|
889
|
-
>>> input = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
|
|
890
|
-
>>> output = mint.nn.functional.dropout2d(input, 0.5)
|
|
891
|
-
>>> print(output.shape)
|
|
892
|
-
(2, 1, 2, 3)
|
|
893
|
-
"""
|
|
894
|
-
def dropout2d_impl_(input, p, training):
|
|
895
|
-
if p == 0 or not training or input.numel() == 0:
|
|
896
|
-
return input
|
|
897
|
-
|
|
898
|
-
if p == 1:
|
|
899
|
-
return mint.mul(input, mint.zeros((), dtype=input.dtype))
|
|
900
|
-
|
|
901
|
-
if input.ndim < 2:
|
|
902
|
-
raise ValueError(f'For dropout2d, input size after unsqueeze must be greater or equal to 2')
|
|
903
|
-
|
|
904
|
-
if ops.is_sequence_shape_unknown(input.shape):
|
|
905
|
-
input_tensor_shape = ops.TensorShape()(input)
|
|
906
|
-
nosie_tensor_shape = mint.ones_like(input_tensor_shape)
|
|
907
|
-
nosie_tensor_shape[0] = input_tensor_shape[0]
|
|
908
|
-
nosie_tensor_shape[1] = input_tensor_shape[1]
|
|
909
|
-
nosie_shape = ops.TensorToTuple()(nosie_tensor_shape)
|
|
910
|
-
else:
|
|
911
|
-
nosie_shape = input.shape[:2] + tuple(1 for _ in range(len(input.shape) - 2))
|
|
912
|
-
nosie = mint.full(nosie_shape, 1 - p, dtype=input.dtype)
|
|
913
|
-
nosie = mint.bernoulli(nosie)
|
|
914
|
-
nosie = mint.div(nosie, 1 - p)
|
|
915
|
-
|
|
916
|
-
return mint.mul(input, nosie)
|
|
917
|
-
|
|
918
|
-
validator.check_float_range(p, 0.0, 1.0, validator.INC_BOTH, "p", "dropout2d")
|
|
919
|
-
validator.check_bool(training, "training", "dropout2d")
|
|
920
|
-
|
|
921
|
-
if input.ndim not in (3, 4):
|
|
922
|
-
log_warning(f"dropout2d receviced a {input.ndim}-D input which is not recommended. Please use dropout instead.")
|
|
923
|
-
|
|
924
|
-
is_batched = input.ndim == 4
|
|
925
|
-
if not is_batched:
|
|
926
|
-
input_shape = input.shape
|
|
927
|
-
if ops.is_sequence_shape_unknown(input.shape):
|
|
928
|
-
input_shape = ops.TensorToTuple()(ops.TensorShape()(input))
|
|
929
|
-
input = input.reshape((1, *input_shape))
|
|
930
|
-
result = dropout2d_impl_(input, p, training)
|
|
931
|
-
result = result.reshape(input_shape)
|
|
932
|
-
else:
|
|
933
|
-
result = dropout2d_impl_(input, p, training)
|
|
934
|
-
|
|
935
|
-
return result
|
|
936
|
-
|
|
937
|
-
|
|
938
833
|
def normalize(input, p=2.0, dim=1, eps=1e-12):
|
|
939
834
|
r"""
|
|
940
835
|
Perform normalization of inputs over specified dimension
|
|
@@ -1060,7 +955,6 @@ def adaptive_avg_pool3d(input, output_size):
|
|
|
1060
955
|
|
|
1061
956
|
.. warning::
|
|
1062
957
|
For Ascend, it is only supported on Atlas A2 Training Series Products.
|
|
1063
|
-
This is an experimental optimizer API that is subject to change or deletion.
|
|
1064
958
|
|
|
1065
959
|
Args:
|
|
1066
960
|
input (Tensor): The input of adaptive_avg_pool3d, which is a 4D or 5D Tensor.
|
|
@@ -1265,6 +1159,7 @@ __all__ = [
|
|
|
1265
1159
|
|
|
1266
1160
|
# 49
|
|
1267
1161
|
'sigmoid',
|
|
1162
|
+
'sigmoid_',
|
|
1268
1163
|
# 50
|
|
1269
1164
|
|
|
1270
1165
|
# 51
|
|
@@ -1402,7 +1297,6 @@ __all__ = [
|
|
|
1402
1297
|
'adaptive_avg_pool2d',
|
|
1403
1298
|
|
|
1404
1299
|
# 350
|
|
1405
|
-
'conv1d',
|
|
1406
1300
|
|
|
1407
1301
|
# 393
|
|
1408
1302
|
'dropout2d',
|
|
@@ -31,7 +31,6 @@ from mindspore.mint.nn.layer.normalization import SyncBatchNorm
|
|
|
31
31
|
from mindspore.mint.nn.layer.activation import LogSigmoid
|
|
32
32
|
from mindspore.mint.nn.layer.activation import SiLU
|
|
33
33
|
from mindspore.mint.nn.layer.activation import Threshold
|
|
34
|
-
from mindspore.mint.nn.layer.basic import Dropout2d
|
|
35
34
|
from mindspore.mint.nn.layer.pooling import AdaptiveMaxPool1d
|
|
36
35
|
from mindspore.mint.nn.layer.pooling import AdaptiveAvgPool1d
|
|
37
36
|
from mindspore.mint.nn.layer.pooling import AdaptiveAvgPool2d
|
|
@@ -46,7 +45,6 @@ __all__ = [
|
|
|
46
45
|
'LayerNorm',
|
|
47
46
|
'LogSigmoid',
|
|
48
47
|
'SiLU',
|
|
49
|
-
'Dropout2d',
|
|
50
48
|
'AdaptiveMaxPool1d',
|
|
51
49
|
'AdaptiveAvgPool1d',
|
|
52
50
|
'AdaptiveAvgPool2d',
|
|
@@ -147,9 +147,6 @@ class LogSigmoid(Cell):
|
|
|
147
147
|
.. image:: ../images/LogSigmoid.png
|
|
148
148
|
:align: center
|
|
149
149
|
|
|
150
|
-
.. warning::
|
|
151
|
-
This is an experimental API that is subject to change or deletion.
|
|
152
|
-
|
|
153
150
|
Inputs:
|
|
154
151
|
- **input** (Tensor) - The input of LogSigmoid with data type of bfloat16, float16 or float32.
|
|
155
152
|
The shape is :math:`(*)` where :math:`*` means, any number of additional dimensions.
|
|
@@ -313,9 +310,6 @@ class Tanh(Cell):
|
|
|
313
310
|
.. image:: ../images/Tanh.png
|
|
314
311
|
:align: center
|
|
315
312
|
|
|
316
|
-
.. warning::
|
|
317
|
-
This is an experimental API that is subject to change or deletion.
|
|
318
|
-
|
|
319
313
|
Inputs:
|
|
320
314
|
- **input** (Tensor) - Tensor of any dimension, input with data type of float16 or float32.
|
|
321
315
|
|
mindspore/mint/nn/layer/basic.py
CHANGED
|
@@ -18,52 +18,6 @@ from __future__ import division
|
|
|
18
18
|
|
|
19
19
|
from mindspore import mint
|
|
20
20
|
from mindspore.nn.cell import Cell
|
|
21
|
-
from mindspore import _checkparam as validator
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class Dropout2d(Cell):
|
|
25
|
-
r"""
|
|
26
|
-
During training, randomly zeroes some channels of the input tensor with probability `p`
|
|
27
|
-
from a Bernoulli distribution (For a 4-dimensional tensor with a shape of :math:`NCHW`,
|
|
28
|
-
the channel feature map refers to a 2-dimensional feature map with the shape of :math:`HW`).
|
|
29
|
-
|
|
30
|
-
For example, the :math:`j\_th` channel of the :math:`i\_th` sample in the batched input is a to-be-processed
|
|
31
|
-
`2D` tensor input[i,j].
|
|
32
|
-
Each channel will be zeroed out independently on every forward call with probability `p` using samples
|
|
33
|
-
from a Bernoulli distribution.
|
|
34
|
-
|
|
35
|
-
`Dropout2d` can improve the independence between channel feature maps.
|
|
36
|
-
|
|
37
|
-
.. warning::
|
|
38
|
-
This is an experimental API that is subject to change or deletion.
|
|
39
|
-
|
|
40
|
-
Refer to :func:`mindspore.mint.nn.functional.dropout2d` for more details.
|
|
41
|
-
|
|
42
|
-
Supported Platforms:
|
|
43
|
-
``Ascend``
|
|
44
|
-
|
|
45
|
-
Examples:
|
|
46
|
-
>>> import mindspore
|
|
47
|
-
>>> from mindspore import Tensor, mint
|
|
48
|
-
>>> import numpy as np
|
|
49
|
-
>>> dropout = mint.nn.Dropout2d(p=0.5)
|
|
50
|
-
>>> x = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
|
|
51
|
-
>>> output = dropout(x)
|
|
52
|
-
>>> print(output.shape)
|
|
53
|
-
(2, 1, 2, 3)
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
def __init__(self, p=0.5):
|
|
57
|
-
"""Initialize Dropout2d."""
|
|
58
|
-
super(Dropout2d, self).__init__()
|
|
59
|
-
validator.check_float_range(p, 0.0, 1.0, validator.INC_BOTH, "p", self.cls_name)
|
|
60
|
-
self.p = p
|
|
61
|
-
|
|
62
|
-
def construct(self, x):
|
|
63
|
-
if not self.training or self.p == 0:
|
|
64
|
-
return x
|
|
65
|
-
|
|
66
|
-
return mint.nn.functional.dropout2d(x, self.p)
|
|
67
21
|
|
|
68
22
|
|
|
69
23
|
class Flatten(Cell):
|
|
@@ -118,6 +72,5 @@ class Flatten(Cell):
|
|
|
118
72
|
|
|
119
73
|
|
|
120
74
|
__all__ = [
|
|
121
|
-
'Dropout2d',
|
|
122
75
|
'Flatten',
|
|
123
76
|
]
|
mindspore/mint/nn/layer/conv.py
CHANGED
|
@@ -222,10 +222,10 @@ class Conv1d(_Conv):
|
|
|
222
222
|
dtype (:class:`mindspore.dtype`, optional): Dtype of Parameters. Default: ``None``, using ``mstype.float32``.
|
|
223
223
|
|
|
224
224
|
Variables:
|
|
225
|
-
- **weight** (Tensor) - The weight of the convolution layer, with shape
|
|
226
|
-
|
|
227
|
-
- **bias** (Tensor) - The bias of the convolution layer, with shape
|
|
228
|
-
|
|
225
|
+
- **weight** (Tensor) - The weight of the convolution layer, with shape
|
|
226
|
+
:math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]})`.
|
|
227
|
+
- **bias** (Tensor) - The bias of the convolution layer, with shape
|
|
228
|
+
:math:`(C_{out})`. If bias is False, this will be None.
|
|
229
229
|
|
|
230
230
|
Inputs:
|
|
231
231
|
- **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, L_{in})` \
|
|
@@ -37,7 +37,6 @@ class _NormBase(Cell):
|
|
|
37
37
|
dtype=None
|
|
38
38
|
) -> None:
|
|
39
39
|
super(_NormBase, self).__init__()
|
|
40
|
-
self.set_train()
|
|
41
40
|
self.shape = ops.Shape()
|
|
42
41
|
self.num_features = num_features
|
|
43
42
|
self.eps = eps
|
|
@@ -110,7 +109,6 @@ class _BatchNorm(_NormBase):
|
|
|
110
109
|
dtype=None) -> None:
|
|
111
110
|
super(_BatchNorm, self).__init__(num_features, eps, momentum, affine, track_running_stats,
|
|
112
111
|
dtype)
|
|
113
|
-
self.training = True
|
|
114
112
|
|
|
115
113
|
|
|
116
114
|
def _check_input_dim(self, input):
|
|
@@ -170,7 +168,6 @@ class BatchNorm1d(_BatchNorm):
|
|
|
170
168
|
|
|
171
169
|
.. warning::
|
|
172
170
|
This API does not support Dynamic Rank.
|
|
173
|
-
This is an experimental API that is subject to change or deletion.
|
|
174
171
|
|
|
175
172
|
Args:
|
|
176
173
|
num_features (int): `C` from an expected input of shape :math:`(N, C, L)`.
|
|
@@ -209,8 +206,8 @@ class BatchNorm1d(_BatchNorm):
|
|
|
209
206
|
>>> net = mint.nn.BatchNorm1d(4)
|
|
210
207
|
>>> output = net(input_x)
|
|
211
208
|
>>> print(output)
|
|
212
|
-
[[
|
|
213
|
-
[
|
|
209
|
+
[[0.6999965 0.4999975 0.4999975 0.59999704]
|
|
210
|
+
[0.4999975 0.399998 0.59999704 0.89999545]]
|
|
214
211
|
"""
|
|
215
212
|
|
|
216
213
|
def _check_input_dim(self, input):
|
|
@@ -239,7 +236,6 @@ class BatchNorm2d(_BatchNorm):
|
|
|
239
236
|
|
|
240
237
|
.. warning::
|
|
241
238
|
- This API does not support Dynamic Rank.
|
|
242
|
-
- This is an experimental API that is subject to change or deletion.
|
|
243
239
|
|
|
244
240
|
Args:
|
|
245
241
|
num_features (int): `C` from an expected input of shape :math:`(N, C, H, W)`.
|
|
@@ -277,10 +273,10 @@ class BatchNorm2d(_BatchNorm):
|
|
|
277
273
|
>>> net = mint.nn.BatchNorm2d(2)
|
|
278
274
|
>>> output = net(input_x)
|
|
279
275
|
>>> print(output)
|
|
280
|
-
[[[[
|
|
281
|
-
[[0.
|
|
282
|
-
|
|
283
|
-
[[
|
|
276
|
+
[[[[0.29999852]]
|
|
277
|
+
[[0.399998 ]]]
|
|
278
|
+
[[[0.4999975 ]]
|
|
279
|
+
[[0.29999852]]]]
|
|
284
280
|
"""
|
|
285
281
|
|
|
286
282
|
def _check_input_dim(self, input):
|
|
@@ -309,7 +305,6 @@ class BatchNorm3d(_BatchNorm):
|
|
|
309
305
|
|
|
310
306
|
.. warning::
|
|
311
307
|
This API does not support Dynamic Rank.
|
|
312
|
-
This is an experimental API that is subject to change or deletion.
|
|
313
308
|
|
|
314
309
|
Args:
|
|
315
310
|
num_features (int): `C` from an expected input of shape :math:`(N, C, D, H, W)`.
|
|
@@ -347,8 +342,8 @@ class BatchNorm3d(_BatchNorm):
|
|
|
347
342
|
>>> net = mint.nn.BatchNorm3d(2)
|
|
348
343
|
>>> output = net(input_x)
|
|
349
344
|
>>> print(output)
|
|
350
|
-
[[[[[
|
|
351
|
-
[[[
|
|
345
|
+
[[[[[0.0999995 0.89999545]]]
|
|
346
|
+
[[[1.1999941 2.2999885 ]]]]]
|
|
352
347
|
"""
|
|
353
348
|
|
|
354
349
|
def _check_input_dim(self, input):
|
|
@@ -73,9 +73,6 @@ class AdaptiveAvgPool2d(_AdaptiveAvgPoolNd):
|
|
|
73
73
|
The output is of size :math:`H x W` , for any input size.
|
|
74
74
|
The number of output features is equal to the number of input planes.
|
|
75
75
|
|
|
76
|
-
.. warning::
|
|
77
|
-
This is an experimental API that is subject to change or deletion.
|
|
78
|
-
|
|
79
76
|
Args:
|
|
80
77
|
output_size (Union(int, tuple[int])): the target output size of the image of the form :math:`H x W` .
|
|
81
78
|
Can be a tuple :math:`(H, W)` or a single :math:`H` for square image :math:`H x H` .
|
|
@@ -129,7 +126,6 @@ class AdaptiveAvgPool3d(Cell):
|
|
|
129
126
|
|
|
130
127
|
.. warning::
|
|
131
128
|
For Ascend, it is only supported on Atlas A2 Training Series Products.
|
|
132
|
-
This is an experimental optimizer API that is subject to change or deletion.
|
|
133
129
|
|
|
134
130
|
Args:
|
|
135
131
|
output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(D, H, W)`,
|
mindspore/nn/__init__.py
CHANGED
|
@@ -21,7 +21,7 @@ from __future__ import absolute_import
|
|
|
21
21
|
|
|
22
22
|
__all__ = ["Cell", "GraphCell", "PipelineGradReducer", "PipelineCell", "MicroBatchInterleaved"]
|
|
23
23
|
|
|
24
|
-
from mindspore.nn import layer, loss, optim, wrap, grad, metrics, probability, sparse, dynamic_lr
|
|
24
|
+
from mindspore.nn import layer, loss, optim, wrap, grad, metrics, probability, sparse, dynamic_lr
|
|
25
25
|
from mindspore.parallel.nn.parallel_grad_reducer import PipelineGradReducer
|
|
26
26
|
from mindspore.parallel.nn.parallel_cell_wrapper import PipelineCell, MicroBatchInterleaved
|
|
27
27
|
from mindspore.nn.learning_rate_schedule import *
|
|
@@ -34,7 +34,6 @@ from mindspore.nn.metrics import *
|
|
|
34
34
|
from mindspore.nn.wrap import *
|
|
35
35
|
from mindspore.nn.grad import Jvp, Vjp
|
|
36
36
|
from mindspore.nn.sparse import *
|
|
37
|
-
from mindspore.nn.reinforcement import *
|
|
38
37
|
from mindspore.nn.utils import *
|
|
39
38
|
|
|
40
39
|
__all__.extend(layer.__all__)
|
|
@@ -46,7 +45,6 @@ __all__.extend(grad.__all__)
|
|
|
46
45
|
__all__.extend(sparse.__all__)
|
|
47
46
|
__all__.extend(learning_rate_schedule.__all__)
|
|
48
47
|
__all__.extend(dynamic_lr.__all__)
|
|
49
|
-
__all__.extend(reinforcement.__all__)
|
|
50
48
|
__all__.extend(utils.__all__)
|
|
51
49
|
|
|
52
50
|
__all__.sort()
|
mindspore/nn/cell.py
CHANGED
|
@@ -192,6 +192,7 @@ class Cell(Cell_):
|
|
|
192
192
|
super().__setattr__("_auto_prefix", auto_prefix)
|
|
193
193
|
super().__setattr__("_scope", None)
|
|
194
194
|
super().__setattr__("_phase", 'train')
|
|
195
|
+
super().__setattr__("_compile_phase", None)
|
|
195
196
|
super().__setattr__("_parameter_layout_dict", None)
|
|
196
197
|
super().__setattr__("_parallel_parameter_name_list", None)
|
|
197
198
|
super().__setattr__("_parallel_parameter_merge_net_dict", None)
|
|
@@ -493,6 +494,19 @@ class Cell(Cell_):
|
|
|
493
494
|
raise TypeError(f"For 'Cell', the property 'phase' must be string type, but got type {type(value)}.")
|
|
494
495
|
self._phase = value
|
|
495
496
|
|
|
497
|
+
@property
|
|
498
|
+
def compile_phase(self):
|
|
499
|
+
return self._compile_phase
|
|
500
|
+
|
|
501
|
+
@compile_phase.setter
|
|
502
|
+
def compile_phase(self, value):
|
|
503
|
+
if not isinstance(value, str):
|
|
504
|
+
raise TypeError(f"For 'Cell', 'compile_phase' must be string type, but got type {type(value)}.")
|
|
505
|
+
self._compile_phase = value
|
|
506
|
+
for cell in self._cells.values():
|
|
507
|
+
if cell is not None:
|
|
508
|
+
cell.compile_phase = value
|
|
509
|
+
|
|
496
510
|
@property
|
|
497
511
|
def parameter_layout_dict(self):
|
|
498
512
|
"""
|
|
@@ -1705,6 +1719,7 @@ class Cell(Cell_):
|
|
|
1705
1719
|
_init_auto_parallel_context(self)
|
|
1706
1720
|
compile_args = self._get_compile_args(args)
|
|
1707
1721
|
self._has_mutable_args_list = _get_mutable_flags(compile_args)
|
|
1722
|
+
_cell_graph_executor.set_real_args(args, kwargs)
|
|
1708
1723
|
_cell_graph_executor.compile(self, *compile_args, phase=self.phase,
|
|
1709
1724
|
jit_config_dict=self._jit_config_dict, **kwargs)
|
|
1710
1725
|
_clear_auto_parallel_context(self)
|
|
@@ -2587,23 +2602,6 @@ class Cell(Cell_):
|
|
|
2587
2602
|
else:
|
|
2588
2603
|
self._jit_config_dict = jit_config.jit_config_dict
|
|
2589
2604
|
|
|
2590
|
-
def flatten_weights(self, fusion_size=0):
|
|
2591
|
-
"""
|
|
2592
|
-
Reset data for weight parameters so that they are using contiguous memory chunks grouped by data type.
|
|
2593
|
-
|
|
2594
|
-
Note:
|
|
2595
|
-
By default, parameters with same data type will using a single contiguous memory chunk. but for
|
|
2596
|
-
some models with huge number of parameters, splitting a large memory chunk into several smaller
|
|
2597
|
-
memory chunks has the potential for performance gains, if this is the case, we can use 'fusion_size'
|
|
2598
|
-
to limit the maximum memory chunk size.
|
|
2599
|
-
|
|
2600
|
-
Args:
|
|
2601
|
-
fusion_size (int): Maximum memory chunk size in bytes, ``0`` for unlimited. Default: ``0`` .
|
|
2602
|
-
"""
|
|
2603
|
-
if fusion_size < 0:
|
|
2604
|
-
raise ValueError(f"Negative 'fusion_size' {fusion_size} is invalid.")
|
|
2605
|
-
Tensor._flatten_tensors(self.trainable_params(), fusion_size) # pylint: disable=W0212
|
|
2606
|
-
|
|
2607
2605
|
@jit_forbidden_register
|
|
2608
2606
|
def register_forward_pre_hook(self, hook_fn, with_kwargs=False):
|
|
2609
2607
|
"""
|
|
@@ -3507,24 +3505,6 @@ class Cell(Cell_):
|
|
|
3507
3505
|
_update_hook_version()
|
|
3508
3506
|
return handle
|
|
3509
3507
|
|
|
3510
|
-
def set_param_ps(self, recurse=True, init_in_server=False):
|
|
3511
|
-
"""
|
|
3512
|
-
Set whether the trainable parameters are updated by parameter server and whether the
|
|
3513
|
-
trainable parameters are initialized on server.
|
|
3514
|
-
|
|
3515
|
-
Note:
|
|
3516
|
-
It only works when a running task is in the parameter server mode.
|
|
3517
|
-
It is only supported in graph mode.
|
|
3518
|
-
|
|
3519
|
-
Args:
|
|
3520
|
-
recurse (bool): Whether sets the trainable parameters of subcells. Default: ``True`` .
|
|
3521
|
-
init_in_server (bool): Whether trainable parameters updated by parameter server are
|
|
3522
|
-
initialized on server. Default: ``False`` .
|
|
3523
|
-
"""
|
|
3524
|
-
params = self.trainable_params(recurse)
|
|
3525
|
-
for param in params:
|
|
3526
|
-
param.set_param_ps(init_in_server)
|
|
3527
|
-
|
|
3528
3508
|
def set_comm_fusion(self, fusion_type, recurse=True):
|
|
3529
3509
|
"""
|
|
3530
3510
|
Set `comm_fusion` for all the parameters in this cell. Please refer to the description of
|
|
@@ -3628,8 +3608,7 @@ class Cell(Cell_):
|
|
|
3628
3608
|
introduced by optimizer shard are recomputed in auto parallel or semi auto parallel mode.
|
|
3629
3609
|
Default: ``False`` .
|
|
3630
3610
|
"""
|
|
3631
|
-
|
|
3632
|
-
self._recompute_cell = recompute_registry.get()(self.construct)
|
|
3611
|
+
self._recompute_cell = recompute_registry.get()(self.construct)
|
|
3633
3612
|
self._recompute()
|
|
3634
3613
|
if 'mp_comm_recompute' in kwargs.keys():
|
|
3635
3614
|
self._mp_comm_recompute(kwargs.get('mp_comm_recompute', False))
|
|
@@ -3646,35 +3625,6 @@ class Cell(Cell_):
|
|
|
3646
3625
|
"the key kwargs must be 'mp_comm_recompute', "
|
|
3647
3626
|
"'parallel_optimizer_comm_recompute', 'recompute_slice_activation'" % key)
|
|
3648
3627
|
|
|
3649
|
-
def place(self, role, rank_id):
|
|
3650
|
-
"""
|
|
3651
|
-
Set the label for all operators in this cell.
|
|
3652
|
-
This label tells MindSpore compiler on which process this cell should be launched.
|
|
3653
|
-
And each process's identical label consists of input `role` and `rank_id`.
|
|
3654
|
-
So by setting different cells with different labels, which will be launched on different processes,
|
|
3655
|
-
users can launch a distributed training or predicting job.
|
|
3656
|
-
|
|
3657
|
-
Note:
|
|
3658
|
-
- This method is effective only after
|
|
3659
|
-
`mindspore.communication.init()` is called for dynamic cluster building.
|
|
3660
|
-
|
|
3661
|
-
Args:
|
|
3662
|
-
role (str): The role of the process on which this cell will be launched.
|
|
3663
|
-
Only 'MS_WORKER' is supported for now.
|
|
3664
|
-
rank_id (int): The rank id of the process on which this cell will be launched.
|
|
3665
|
-
The rank is unique in processes with the same role.
|
|
3666
|
-
|
|
3667
|
-
Examples:
|
|
3668
|
-
>>> from mindspore import context
|
|
3669
|
-
>>> import mindspore.nn as nn
|
|
3670
|
-
>>> context.set_context(mode=context.GRAPH_MODE)
|
|
3671
|
-
>>> fc = nn.Dense(2, 3)
|
|
3672
|
-
>>> fc.place('MS_WORKER', 0)
|
|
3673
|
-
"""
|
|
3674
|
-
all_ops = self._get_prims_recursively()
|
|
3675
|
-
for op in all_ops:
|
|
3676
|
-
op.place(role, rank_id)
|
|
3677
|
-
|
|
3678
3628
|
def _get_attr_from_cell(self, network):
|
|
3679
3629
|
if not isinstance(network, Cell):
|
|
3680
3630
|
return
|
mindspore/nn/layer/basic.py
CHANGED
|
@@ -38,7 +38,7 @@ from mindspore.common.generator import default_generator
|
|
|
38
38
|
|
|
39
39
|
__all__ = ['Dropout', 'Flatten', 'Dense', 'Linear', 'ClipByNorm', 'Norm', 'OneHot', 'Pad', 'Unfold', 'Tril', 'Triu',
|
|
40
40
|
'MatrixDiag', 'MatrixDiagPart', 'MatrixSetDiag', 'L1Regularizer', 'Dropout1d',
|
|
41
|
-
'Dropout2d', 'Dropout3d', 'Upsample', 'Roll', 'Identity', 'Unflatten', 'DropoutExt']
|
|
41
|
+
'Dropout2d', 'Dropout3d', 'Upsample', 'Roll', 'Identity', 'Unflatten', 'DropoutExt', 'Dropout2dExt']
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
class L1Regularizer(Cell):
|
|
@@ -382,6 +382,54 @@ class Dropout2d(Cell):
|
|
|
382
382
|
return f"p={self.keep_prob}"
|
|
383
383
|
|
|
384
384
|
|
|
385
|
+
|
|
386
|
+
class Dropout2dExt(Cell):
|
|
387
|
+
r"""
|
|
388
|
+
During training, randomly zeroes some channels of the input tensor with probability `p`
|
|
389
|
+
from a Bernoulli distribution (For a 4-dimensional tensor with a shape of :math:`NCHW`,
|
|
390
|
+
the channel feature map refers to a 2-dimensional feature map with the shape of :math:`HW`).
|
|
391
|
+
|
|
392
|
+
For example, the :math:`j\_th` channel of the :math:`i\_th` sample in the batched input is a to-be-processed
|
|
393
|
+
`2D` tensor input[i,j].
|
|
394
|
+
Each channel will be zeroed out independently on every forward call with probability `p` using samples
|
|
395
|
+
from a Bernoulli distribution.
|
|
396
|
+
|
|
397
|
+
`Dropout2d` can improve the independence between channel feature maps.
|
|
398
|
+
|
|
399
|
+
.. warning::
|
|
400
|
+
This is an experimental API that is subject to change or deletion.
|
|
401
|
+
|
|
402
|
+
Refer to :func:`mindspore.mint.nn.functional.dropout2d` for more details.
|
|
403
|
+
|
|
404
|
+
Supported Platforms:
|
|
405
|
+
``Ascend``
|
|
406
|
+
|
|
407
|
+
Examples:
|
|
408
|
+
>>> import mindspore
|
|
409
|
+
>>> from mindspore import Tensor, mint
|
|
410
|
+
>>> import numpy as np
|
|
411
|
+
>>> dropout = mint.nn.Dropout2d(p=0.5)
|
|
412
|
+
>>> x = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
|
|
413
|
+
>>> output = dropout(x)
|
|
414
|
+
>>> print(output.shape)
|
|
415
|
+
(2, 1, 2, 3)
|
|
416
|
+
"""
|
|
417
|
+
|
|
418
|
+
def __init__(self, p=0.5, inplace=False):
|
|
419
|
+
"""Initialize Dropout2d."""
|
|
420
|
+
super(Dropout2dExt, self).__init__()
|
|
421
|
+
self.p = p
|
|
422
|
+
self.inplace = inplace
|
|
423
|
+
self.generator_step = Tensor(12, mstype.int64)
|
|
424
|
+
|
|
425
|
+
def construct(self, input):
|
|
426
|
+
if not self.training or self.p == 0:
|
|
427
|
+
return input
|
|
428
|
+
|
|
429
|
+
seed, offset = default_generator._step(self.generator_step) # pylint: disable=protected-access
|
|
430
|
+
return ops.auto_generate.dropout2d_ext_op(input, self.p, self.training, self.inplace, seed, offset)
|
|
431
|
+
|
|
432
|
+
|
|
385
433
|
class Dropout3d(Cell):
|
|
386
434
|
r"""
|
|
387
435
|
During training, randomly zeroes some channels of the input tensor
|
mindspore/nn/layer/container.py
CHANGED
|
@@ -247,6 +247,14 @@ class SequentialCell(Cell):
|
|
|
247
247
|
self._cells = temp_dict
|
|
248
248
|
self.cell_list = list(self._cells.values())
|
|
249
249
|
|
|
250
|
+
def __setattr__(self, name, value):
|
|
251
|
+
from mindspore.common.recompute import _RecomputeCell
|
|
252
|
+
if isinstance(value, _RecomputeCell):
|
|
253
|
+
# RecomputeCell should not be executed
|
|
254
|
+
object.__setattr__(self, name, value)
|
|
255
|
+
else:
|
|
256
|
+
super().__setattr__(name, value)
|
|
257
|
+
|
|
250
258
|
def __bool__(self):
|
|
251
259
|
return len(self._cells) != 0
|
|
252
260
|
|
|
@@ -594,6 +602,14 @@ class CellDict(_CellDictBase, Cell):
|
|
|
594
602
|
def __delitem__(self, key):
|
|
595
603
|
del self._cells[key]
|
|
596
604
|
|
|
605
|
+
def __setattr__(self, name, value):
|
|
606
|
+
from mindspore.common.recompute import _RecomputeCell
|
|
607
|
+
if isinstance(value, _RecomputeCell):
|
|
608
|
+
# RecomputeCell should not be executed
|
|
609
|
+
object.__setattr__(self, name, value)
|
|
610
|
+
else:
|
|
611
|
+
super().__setattr__(name, value)
|
|
612
|
+
|
|
597
613
|
def __len__(self):
|
|
598
614
|
return len(self._cells)
|
|
599
615
|
|