mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/initializer.py +51 -15
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +62 -15
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +183 -37
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +315 -60
- mindspore/communication/management.py +14 -14
- mindspore/context.py +132 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +983 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +268 -23
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +26 -13
- mindspore/mint/special/__init__.py +63 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +276 -96
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +137 -10
- mindspore/nn/layer/embedding.py +137 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +124 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +767 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
- mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +492 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +564 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +402 -12
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +7 -2
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +14 -146
- mindspore/ops/operations/comm_ops.py +63 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +273 -20
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +31 -9
- mindspore/parallel/_cell_wrapper.py +85 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +127 -13
- mindspore/parallel/_utils.py +53 -22
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +1146 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +285 -413
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +39 -104
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +105 -19
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +97 -31
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +145 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +375 -0
- mindspore/train/dataset_helper.py +15 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +154 -58
- mindspore/train/serialization.py +342 -128
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +248 -242
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/nn/layer/embedding.py
CHANGED
|
@@ -16,13 +16,14 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
import mindspore.common.dtype as mstype
|
|
19
|
+
import mindspore.ops as ops
|
|
19
20
|
from mindspore import log as logger
|
|
20
21
|
from mindspore.common.tensor import Tensor
|
|
21
22
|
from mindspore.ops import operations as P
|
|
22
23
|
from mindspore.ops import functional as F
|
|
23
24
|
from mindspore.common.parameter import Parameter
|
|
24
25
|
from mindspore.common.parameter import _get_unique_parameter_key
|
|
25
|
-
from mindspore.common.initializer import initializer
|
|
26
|
+
from mindspore.common.initializer import initializer, Normal
|
|
26
27
|
from mindspore.communication.management import get_group_size, get_rank
|
|
27
28
|
from mindspore.context import ParallelMode
|
|
28
29
|
from mindspore.parallel._utils import _get_parallel_mode, _get_full_batch
|
|
@@ -34,7 +35,7 @@ from mindspore.ops.primitive import constexpr, _primexpr
|
|
|
34
35
|
from mindspore.nn.layer.basic import ClipByNorm
|
|
35
36
|
from mindspore.nn.cell import Cell
|
|
36
37
|
|
|
37
|
-
__all__ = ['Embedding', 'EmbeddingLookup', 'MultiFieldEmbeddingLookup']
|
|
38
|
+
__all__ = ['Embedding', 'EmbeddingExt', 'EmbeddingLookup', 'MultiFieldEmbeddingLookup']
|
|
38
39
|
|
|
39
40
|
|
|
40
41
|
@_primexpr
|
|
@@ -161,6 +162,139 @@ class Embedding(Cell):
|
|
|
161
162
|
f'embedding_table={self.embedding_table}, dtype={self.dtype}, padding_idx={self.padding_idx}'
|
|
162
163
|
|
|
163
164
|
|
|
165
|
+
class EmbeddingExt(Cell):
|
|
166
|
+
r"""
|
|
167
|
+
The value in `input` is used as the index, and the corresponding embedding vector is queried from `weight` .
|
|
168
|
+
|
|
169
|
+
.. warning::
|
|
170
|
+
- This is an experimental API that is subject to change or deletion.
|
|
171
|
+
- On Ascend, the behavior is unpredictable when the value of `input` is invalid.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
num_embeddings (int): Size of the dictionary of embeddings.
|
|
175
|
+
embedding_dim (int): The size of each embedding vector.
|
|
176
|
+
padding_idx (int, optional): If the value is not None, the corresponding row of embedding vector
|
|
177
|
+
will not be updated in training. The value of embedding vector at `padding_idx` will default
|
|
178
|
+
to zeros when the Embedding layer is newly constructed. The value should be in range
|
|
179
|
+
`[-num_embeddings, num_embeddings)` if it's not ``None``. Default ``None``.
|
|
180
|
+
max_norm (float, optional): If the value is not None, firstly get the p-norm result of the embedding
|
|
181
|
+
vector specified by `input` where p is specified by `norm_type`; if the result is larger then `max_norm`,
|
|
182
|
+
update the embedding vector` with :math:`\frac{max\_norm}{result+1e^{-7}}`. Default ``None``.
|
|
183
|
+
norm_type (float, optional): Indicated the value of p in p-norm. Default ``2.0``.
|
|
184
|
+
scale_grad_by_freq (bool, optional): If ``True`` the gradients will be scaled by the inverse of frequency
|
|
185
|
+
of the index in `input`. Default ``False``.
|
|
186
|
+
sparse (bool, optional): If ``True``, gradient w.r.t. `weight` matrix will be a sparse tensor which
|
|
187
|
+
has not been supported. Default: ``False``.
|
|
188
|
+
_weight (Tensor, optional): Used to initialize the `weight` of Embedding. If ``None``, the weight will be
|
|
189
|
+
initialized from normal distribution :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})`. Default ``None``.
|
|
190
|
+
_freeze(bool, optional): If `weight` , the learnable weights of this module, should be freezed.
|
|
191
|
+
Default: ``False``.
|
|
192
|
+
dtype (mindspore.dtype, optional) : Dtype of Embedding's `weight` . It is meaningless when `_weight` is
|
|
193
|
+
not None. Default: ``None``.
|
|
194
|
+
|
|
195
|
+
Variables:
|
|
196
|
+
weight (Parameter): The learnable weights of this module of shape (num_embeddings, embedding_dim), which
|
|
197
|
+
initialized from :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})` or `_weight` .
|
|
198
|
+
|
|
199
|
+
Inputs:
|
|
200
|
+
- **input** (Tensor) - The indices used to lookup in the embedding vector. The data type must be
|
|
201
|
+
int32 or int64, and the value should be in range `[0, num_embeddings)`.
|
|
202
|
+
|
|
203
|
+
Outputs:
|
|
204
|
+
Tensor, has the same data type as weight, the shape is :math:`(*input.shape, embedding\_dim)`.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
TypeError: If `num_embeddings` is not an int.
|
|
208
|
+
TypeError: If `embedding_dim` is not an int.
|
|
209
|
+
ValueError: If `padding_idx` is out of valid range.
|
|
210
|
+
TypeError: If `max_norm` is not a float.
|
|
211
|
+
TypeError: If `norm_type` is not a float.
|
|
212
|
+
TypeError: If `scale_grad_by_freq` is not a bool.
|
|
213
|
+
ValueError: If `weight.shape` is invalid.
|
|
214
|
+
TypeError: If `dtype` is not one of mindspore.dtype.
|
|
215
|
+
|
|
216
|
+
Supported Platforms:
|
|
217
|
+
``Ascend``
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
>>> import mindspore
|
|
221
|
+
>>> import numpy as np
|
|
222
|
+
>>> from mindspore import Tensor, nn
|
|
223
|
+
>>> input = Tensor([[1, 0, 1, 1], [0, 0, 1, 0]])
|
|
224
|
+
>>> embedding = nn.EmbeddingExt(num_embeddings=10, embedding_dim=3)
|
|
225
|
+
>>> output = embedding(input)
|
|
226
|
+
>>> print(output)
|
|
227
|
+
[[[-0.0024154 -0.01203444 0.00811537]
|
|
228
|
+
[ 0.00233847 -0.00596091 0.00536799]
|
|
229
|
+
[-0.0024154 -0.01203444 0.00811537]
|
|
230
|
+
[-0.0024154 -0.01203444 0.00811537]]
|
|
231
|
+
[[ 0.00233847 -0.00596091 0.00536799]
|
|
232
|
+
[ 0.00233847 -0.00596091 0.00536799]
|
|
233
|
+
[-0.0024154 -0.01203444 0.00811537]
|
|
234
|
+
[ 0.00233847 -0.00596091 0.00536799]]]
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0,
|
|
238
|
+
scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, dtype=None):
|
|
239
|
+
"""Initialize Embedding."""
|
|
240
|
+
super().__init__()
|
|
241
|
+
self.sparse = Validator.check_value_type('sparse', sparse, [bool], self.cls_name)
|
|
242
|
+
if self.sparse:
|
|
243
|
+
raise ValueError("For Embedding, the scenerio, where `sparse` is True, has not be supported.")
|
|
244
|
+
self.num_embeddings = Validator.check_value_type(
|
|
245
|
+
'num_embeddings', num_embeddings, [int], self.cls_name)
|
|
246
|
+
self.embedding_dim = Validator.check_value_type(
|
|
247
|
+
'embedding_dim', embedding_dim, [int], self.cls_name)
|
|
248
|
+
self.dtype = dtype if dtype is not None else mstype.float32
|
|
249
|
+
Validator.check_subclass(
|
|
250
|
+
"dtype", self.dtype, mstype.number_type, self.cls_name)
|
|
251
|
+
self.padding_idx = padding_idx
|
|
252
|
+
if _weight is None:
|
|
253
|
+
init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=self.dtype, init=Normal(1, 0))
|
|
254
|
+
init_tensor = self._zero_weight_by_index(init_tensor)
|
|
255
|
+
self.weight = Parameter(init_tensor, name='weight', requires_grad=not _freeze)
|
|
256
|
+
else:
|
|
257
|
+
if _weight.shape != (num_embeddings, embedding_dim):
|
|
258
|
+
raise ValueError(f"For Embedding, shape of weight should be match with num_embeddings "
|
|
259
|
+
f"and embedding_dim, but got weight.shape: {_weight.shape}, "
|
|
260
|
+
f"and (num_embeddings, embedding_dim): ({num_embeddings}, {embedding_dim})")
|
|
261
|
+
self.weight = Parameter(_weight, name='weight', requires_grad=not _freeze)
|
|
262
|
+
|
|
263
|
+
self.max_norm = max_norm
|
|
264
|
+
if max_norm is not None:
|
|
265
|
+
self.max_norm = Validator.check_value_type('max_norm', max_norm, [float], self.cls_name)
|
|
266
|
+
|
|
267
|
+
self.norm_type = norm_type
|
|
268
|
+
if norm_type is not None:
|
|
269
|
+
self.norm_type = Validator.check_value_type('norm_type', norm_type,
|
|
270
|
+
[float], self.cls_name)
|
|
271
|
+
|
|
272
|
+
self.scale_grad_by_freq = scale_grad_by_freq
|
|
273
|
+
if scale_grad_by_freq is not None:
|
|
274
|
+
self.scale_grad_by_freq = Validator.check_value_type('scale_grad_by_freq',
|
|
275
|
+
scale_grad_by_freq,
|
|
276
|
+
[bool], self.cls_name)
|
|
277
|
+
|
|
278
|
+
def _zero_weight_by_index(self, init_tensor):
|
|
279
|
+
if self.padding_idx is not None:
|
|
280
|
+
self.padding_idx = Validator.check_int_range(self.padding_idx, -self.num_embeddings, self.num_embeddings,
|
|
281
|
+
Validator.INC_LEFT, "padding_idx", self.cls_name)
|
|
282
|
+
if isinstance(init_tensor, Tensor) and init_tensor.init is not None:
|
|
283
|
+
init_tensor = init_tensor.init_data()
|
|
284
|
+
init_tensor[self.padding_idx] = 0
|
|
285
|
+
|
|
286
|
+
return init_tensor
|
|
287
|
+
|
|
288
|
+
def construct(self, input):
|
|
289
|
+
return ops.embedding(input, self.weight, self.padding_idx, self.max_norm,
|
|
290
|
+
self.norm_type, self.scale_grad_by_freq)
|
|
291
|
+
|
|
292
|
+
def extend_repr(self):
|
|
293
|
+
return f'num_embeddings={self.num_embeddings}, embedding_dim={self.embedding_dim}, ' \
|
|
294
|
+
f'padding_idx={self.padding_idx}, max_norm={self.max_norm}, norm_type={self.norm_type}, ' \
|
|
295
|
+
f'scale_grad_by_freq={self.scale_grad_by_freq}, dtype={self.dtype}'
|
|
296
|
+
|
|
297
|
+
|
|
164
298
|
@_primexpr
|
|
165
299
|
def _make_axis_range(start, end):
|
|
166
300
|
axis = tuple(range(start, end))
|
|
@@ -182,6 +316,7 @@ class EmbeddingLookup(Cell):
|
|
|
182
316
|
specified 'axis = 0' to lookup table.
|
|
183
317
|
In field slice mode, the manual_shapes must be given. It is a tuple ,where
|
|
184
318
|
the element is vocab[i], vocab[i] is the row numbers for i-th part.
|
|
319
|
+
This module does not support the PyNative mode.
|
|
185
320
|
|
|
186
321
|
Args:
|
|
187
322
|
vocab_size (int): Size of the dictionary of embeddings.
|
|
@@ -19,7 +19,8 @@ from __future__ import division
|
|
|
19
19
|
import itertools
|
|
20
20
|
import numbers
|
|
21
21
|
import hashlib
|
|
22
|
-
|
|
22
|
+
import numpy as np
|
|
23
|
+
import mindspore.ops as ops
|
|
23
24
|
from mindspore.ops import operations as P
|
|
24
25
|
from mindspore.ops.operations import _inner_ops as inner
|
|
25
26
|
from mindspore.common.parameter import Parameter
|
|
@@ -37,7 +38,7 @@ from mindspore.nn.cell import Cell
|
|
|
37
38
|
from mindspore import log as logger
|
|
38
39
|
from mindspore.ops import group_norm
|
|
39
40
|
|
|
40
|
-
__all__ = ['BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'LayerNorm', 'GroupNorm',
|
|
41
|
+
__all__ = ['BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'LayerNorm', 'LayerNormExt', 'GroupNorm',
|
|
41
42
|
'SyncBatchNorm', 'InstanceNorm1d', 'InstanceNorm2d', 'InstanceNorm3d']
|
|
42
43
|
|
|
43
44
|
|
|
@@ -558,14 +559,14 @@ class SyncBatchNorm(_BatchNorm):
|
|
|
558
559
|
|
|
559
560
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
560
561
|
Please see the `Ascend tutorial
|
|
561
|
-
<https://www.mindspore.cn/
|
|
562
|
+
<https://www.mindspore.cn/docs/en/master/model_train/parallel/rank_table.html>`_
|
|
562
563
|
for more details.
|
|
563
564
|
|
|
564
565
|
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
565
|
-
<https://www.mindspore.cn/
|
|
566
|
+
<https://www.mindspore.cn/docs/en/master/model_train/parallel/mpirun.html>`_ .
|
|
566
567
|
|
|
567
568
|
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
568
|
-
Startup <https://www.mindspore.cn/
|
|
569
|
+
Startup <https://www.mindspore.cn/docs/en/master/model_train/parallel/dynamic_cluster.html>`_ .
|
|
569
570
|
|
|
570
571
|
This example should be run with multiple devices.
|
|
571
572
|
|
|
@@ -774,6 +775,101 @@ class LayerNorm(Cell):
|
|
|
774
775
|
self.normalized_shape, self.begin_norm_axis, self.begin_params_axis, self.gamma, self.beta)
|
|
775
776
|
|
|
776
777
|
|
|
778
|
+
class LayerNormExt(Cell):
|
|
779
|
+
r"""
|
|
780
|
+
Applies Layer Normalization over a mini-batch of inputs.
|
|
781
|
+
|
|
782
|
+
Layer Normalization is widely used in recurrent neural networks. It applies
|
|
783
|
+
normalization on a mini-batch of inputs for each single training case as described
|
|
784
|
+
in the paper `Layer Normalization <https://arxiv.org/pdf/1607.06450.pdf>`_.
|
|
785
|
+
|
|
786
|
+
Unlike Batch Normalization, Layer Normalization performs exactly the same computation at training and
|
|
787
|
+
testing time. It is applied across all channels and pixel but only one batch size.
|
|
788
|
+
:math:`\gamma` is the scale value learned through training and :math:`\beta` is the shift value.
|
|
789
|
+
It can be described using the following formula:
|
|
790
|
+
|
|
791
|
+
.. math::
|
|
792
|
+
y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
|
|
793
|
+
|
|
794
|
+
.. warning::
|
|
795
|
+
This is an experimental API that is subject to change or deletion.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
normalized_shape (Union(tuple[int], list[int], int)): The normalized shape of `x` for LayerNorm
|
|
799
|
+
eps (float): A value added to the denominator for numerical stability(:math:`\epsilon`). Default: ``1e-5`` .
|
|
800
|
+
elementwise_affine (bool): Whether affine transformation is required. When this parameter is set to ``True``,
|
|
801
|
+
the weight parameter is initialized to 1 and the offset is initialized to 0. Default: ``True``.
|
|
802
|
+
bias (bool): If set to ``False``, the layer will not learn an additive bias (only relevant if
|
|
803
|
+
`elementwise_affine` is ``True``). Default: ``True``.
|
|
804
|
+
dtype (:class:`mindspore.dtype`): Dtype of Parameters. Default: ``None`` .
|
|
805
|
+
|
|
806
|
+
Inputs:
|
|
807
|
+
- **x** (Tensor) - The shape is :math:`(N, *)`, where :math:`*` is equal to normalized_shape.
|
|
808
|
+
|
|
809
|
+
Outputs:
|
|
810
|
+
Tensor, the normalized and scaled offset tensor, has the same shape and data type as the `x`.
|
|
811
|
+
|
|
812
|
+
Raises:
|
|
813
|
+
TypeError: If `eps` is not a float.
|
|
814
|
+
|
|
815
|
+
Supported Platforms:
|
|
816
|
+
``Ascend``
|
|
817
|
+
|
|
818
|
+
Examples:
|
|
819
|
+
>>> import mindspore as ms
|
|
820
|
+
>>> import numpy as np
|
|
821
|
+
>>> x = ms.Tensor(np.ones([20, 5, 10, 10]), ms.float32)
|
|
822
|
+
>>> shape1 = x.shape[1:]
|
|
823
|
+
>>> m = ms.nn.LayerNormExt(shape1)
|
|
824
|
+
>>> output = m(x).shape
|
|
825
|
+
>>> print(output)
|
|
826
|
+
(20, 5, 10, 10)
|
|
827
|
+
"""
|
|
828
|
+
|
|
829
|
+
def __init__(self,
|
|
830
|
+
normalized_shape,
|
|
831
|
+
eps=1e-5,
|
|
832
|
+
elementwise_affine=True,
|
|
833
|
+
bias=True,
|
|
834
|
+
dtype=None
|
|
835
|
+
):
|
|
836
|
+
"""Initialize LayerNormExt."""
|
|
837
|
+
super(LayerNormExt, self).__init__()
|
|
838
|
+
if isinstance(normalized_shape, numbers.Integral):
|
|
839
|
+
# mypy error: incompatible types in assignment
|
|
840
|
+
normalized_shape = (normalized_shape,) # type: ignore[assignment]
|
|
841
|
+
if not isinstance(normalized_shape, (tuple, list)):
|
|
842
|
+
raise TypeError(f"For '{self.cls_name}', the type of 'normalized_shape' must be tuple[int] or list[int], "
|
|
843
|
+
f"but got {normalized_shape} and the type is {type(normalized_shape)}.")
|
|
844
|
+
if not normalized_shape:
|
|
845
|
+
raise ValueError(
|
|
846
|
+
f"Expected normalized_shape to be at least 1-dimensional, i.e., containing at "
|
|
847
|
+
f"least one element, but got normalized_shape = {normalized_shape}"
|
|
848
|
+
)
|
|
849
|
+
self.normalized_shape = tuple(normalized_shape)
|
|
850
|
+
self.eps = eps
|
|
851
|
+
self.elementwise_affine = elementwise_affine
|
|
852
|
+
ms_dtype = mstype.float32 if dtype is None else dtype
|
|
853
|
+
if self.elementwise_affine:
|
|
854
|
+
self.weight = Parameter(Tensor(np.ones(normalized_shape), ms_dtype), name="weight")
|
|
855
|
+
if bias:
|
|
856
|
+
self.bias = Parameter(Tensor(np.zeros(normalized_shape), ms_dtype), name="bias")
|
|
857
|
+
else:
|
|
858
|
+
self.bias = None
|
|
859
|
+
else:
|
|
860
|
+
self.weight = None
|
|
861
|
+
self.bias = None
|
|
862
|
+
|
|
863
|
+
def construct(self, input):
|
|
864
|
+
y = ops.layer_norm(input, self.normalized_shape, self.weight,
|
|
865
|
+
self.bias, self.eps)
|
|
866
|
+
return y
|
|
867
|
+
|
|
868
|
+
def extend_repr(self):
|
|
869
|
+
return 'normalized_shape={}, eps={}, elementwise_affine={}'.format(
|
|
870
|
+
self.normalized_shape, self.eps, self.elementwise_affine)
|
|
871
|
+
|
|
872
|
+
|
|
777
873
|
class _InstanceNorm(Cell):
|
|
778
874
|
"""Instance Normalization base class."""
|
|
779
875
|
@cell_attr_register
|
mindspore/nn/layer/padding.py
CHANGED
|
@@ -279,20 +279,6 @@ class ConstantPad1d(_ConstantPadNd):
|
|
|
279
279
|
[0.5 1. 1. 1. 1. 0.5]]]]
|
|
280
280
|
>>> print(out.shape)
|
|
281
281
|
(1, 2, 3, 6)
|
|
282
|
-
>>> # padding is negative
|
|
283
|
-
>>> padding = (-1, 0)
|
|
284
|
-
>>> value = 0.5
|
|
285
|
-
>>> pad1d = ms.nn.ConstantPad1d(padding, value)
|
|
286
|
-
>>> out = pad1d(x)
|
|
287
|
-
>>> print(out)
|
|
288
|
-
[[[[1. 1. 1.]
|
|
289
|
-
[1. 1. 1.]
|
|
290
|
-
[1. 1. 1.]]
|
|
291
|
-
[[1. 1. 1.]
|
|
292
|
-
[1. 1. 1.]
|
|
293
|
-
[1. 1. 1.]]]]
|
|
294
|
-
>>> print(out.shape)
|
|
295
|
-
(1, 2, 3, 3)
|
|
296
282
|
"""
|
|
297
283
|
|
|
298
284
|
def __init__(self, padding, value):
|
|
@@ -336,21 +322,21 @@ class ConstantPad2d(_ConstantPadNd):
|
|
|
336
322
|
>>> import mindspore as ms
|
|
337
323
|
>>> x = np.ones(shape=(1, 2, 3, 4)).astype(np.float32)
|
|
338
324
|
>>> x = ms.Tensor(x)
|
|
339
|
-
>>> padding = (
|
|
325
|
+
>>> padding = (1, 1, 0, 1)
|
|
340
326
|
>>> value = 0.5
|
|
341
327
|
>>> pad2d = ms.nn.ConstantPad2d(padding, value)
|
|
342
328
|
>>> out = pad2d(x)
|
|
343
329
|
>>> print(out)
|
|
344
|
-
[[[[1. 1. 1. 0.5]
|
|
345
|
-
[1. 1. 1. 0.5]
|
|
346
|
-
[1. 1. 1. 0.5]
|
|
347
|
-
[0.5 0.5 0.5 0.5]]
|
|
348
|
-
[[1. 1. 1. 0.5]
|
|
349
|
-
[1. 1. 1. 0.5]
|
|
350
|
-
[1. 1. 1. 0.5]
|
|
351
|
-
[0.5 0.5 0.5 0.5]]]]
|
|
330
|
+
[[[[0.5 1. 1. 1. 1. 0.5]
|
|
331
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
332
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
333
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]]
|
|
334
|
+
[[0.5 1. 1. 1. 1. 0.5]
|
|
335
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
336
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
337
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]]]]
|
|
352
338
|
>>> print(out.shape)
|
|
353
|
-
(1, 2, 4,
|
|
339
|
+
(1, 2, 4, 6)
|
|
354
340
|
"""
|
|
355
341
|
|
|
356
342
|
def __init__(self, padding, value):
|
|
@@ -396,25 +382,25 @@ class ConstantPad3d(_ConstantPadNd):
|
|
|
396
382
|
>>> import mindspore as ms
|
|
397
383
|
>>> x = np.ones(shape=(1, 2, 3, 4)).astype(np.float32)
|
|
398
384
|
>>> x = ms.Tensor(x)
|
|
399
|
-
>>> padding = (
|
|
385
|
+
>>> padding = (1, 1, 0, 1, 1, 0)
|
|
400
386
|
>>> value = 0.5
|
|
401
387
|
>>> pad3d = ms.nn.ConstantPad3d(padding, value)
|
|
402
388
|
>>> out = pad3d(x)
|
|
403
389
|
>>> print(out)
|
|
404
|
-
[[[[0.5 0.5 0.5 0.5]
|
|
405
|
-
[0.5 0.5 0.5 0.5]
|
|
406
|
-
[0.5 0.5 0.5 0.5]
|
|
407
|
-
[0.5 0.5 0.5 0.5]]
|
|
408
|
-
[[1. 1. 1. 0.5]
|
|
409
|
-
[1. 1. 1. 0.5]
|
|
410
|
-
[1. 1. 1. 0.5]
|
|
411
|
-
[0.5 0.5 0.5 0.5]]
|
|
412
|
-
[[1. 1. 1. 0.5]
|
|
413
|
-
[1. 1. 1. 0.5]
|
|
414
|
-
[1. 1. 1. 0.5]
|
|
415
|
-
[0.5 0.5 0.5 0.5]]]]
|
|
390
|
+
[[[[0.5 0.5 0.5 0.5 0.5 0.5]
|
|
391
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]
|
|
392
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]
|
|
393
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]]
|
|
394
|
+
[[0.5 1. 1. 1. 1. 0.5]
|
|
395
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
396
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
397
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]]
|
|
398
|
+
[[0.5 1. 1. 1. 1. 0.5]
|
|
399
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
400
|
+
[0.5 1. 1. 1. 1. 0.5]
|
|
401
|
+
[0.5 0.5 0.5 0.5 0.5 0.5]]]]
|
|
416
402
|
>>> print(out.shape)
|
|
417
|
-
(1, 3, 4,
|
|
403
|
+
(1, 3, 4, 6)
|
|
418
404
|
"""
|
|
419
405
|
|
|
420
406
|
def __init__(self, padding, value):
|
|
@@ -652,20 +638,20 @@ class ZeroPad2d(_ConstantPadNd):
|
|
|
652
638
|
>>> import mindspore as ms
|
|
653
639
|
>>> x = np.ones(shape=(1, 2, 3, 4)).astype(np.float32)
|
|
654
640
|
>>> x = ms.Tensor(x)
|
|
655
|
-
>>> padding = (
|
|
641
|
+
>>> padding = (1, 1, 0, 1)
|
|
656
642
|
>>> pad = ms.nn.ZeroPad2d(padding)
|
|
657
643
|
>>> out = pad(x)
|
|
658
644
|
>>> print(out)
|
|
659
|
-
[[[[1. 1. 1. 0.]
|
|
660
|
-
[1. 1. 1. 0.]
|
|
661
|
-
[1. 1. 1. 0.]
|
|
662
|
-
[0. 0. 0. 0.]]
|
|
663
|
-
[[1. 1. 1. 0.]
|
|
664
|
-
[1. 1. 1. 0.]
|
|
665
|
-
[1. 1. 1. 0.]
|
|
666
|
-
[0. 0. 0. 0.]]]]
|
|
645
|
+
[[[[0. 1. 1. 1. 1. 0.]
|
|
646
|
+
[0. 1. 1. 1. 1. 0.]
|
|
647
|
+
[0. 1. 1. 1. 1. 0.]
|
|
648
|
+
[0. 0. 0. 0. 0. 0.]]
|
|
649
|
+
[[0. 1. 1. 1. 1. 0.]
|
|
650
|
+
[0. 1. 1. 1. 1. 0.]
|
|
651
|
+
[0. 1. 1. 1. 1. 0.]
|
|
652
|
+
[0. 0. 0. 0. 0. 0.]]]]
|
|
667
653
|
>>> print(out.shape)
|
|
668
|
-
(1, 2, 4,
|
|
654
|
+
(1, 2, 4, 6)
|
|
669
655
|
"""
|
|
670
656
|
|
|
671
657
|
def __init__(self, padding):
|