mindspore 2.4.0__cp311-cp311-win_amd64.whl → 2.4.10__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/api.py +1 -4
- mindspore/common/file_system.py +2 -0
- mindspore/common/initializer.py +51 -15
- mindspore/common/parameter.py +6 -5
- mindspore/common/tensor.py +15 -49
- mindspore/communication/_comm_helper.py +5 -0
- mindspore/communication/comm_func.py +7 -7
- mindspore/context.py +16 -2
- mindspore/dataset/engine/datasets_standard_format.py +17 -0
- mindspore/dataset/engine/datasets_user_defined.py +27 -1
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +2 -2
- mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
- mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
- mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
- mindspore/include/api/context.h +1 -1
- mindspore/include/dataset/constants.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +490 -2
- mindspore/mint/nn/__init__.py +2 -2
- mindspore/mint/optim/adamw.py +6 -14
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +16 -4
- mindspore/nn/layer/basic.py +24 -7
- mindspore/nn/layer/conv.py +3 -0
- mindspore/nn/layer/embedding.py +31 -14
- mindspore/nn/layer/pooling.py +8 -10
- mindspore/nn/optim/tft_wrapper.py +12 -15
- mindspore/nn/utils/__init__.py +22 -0
- mindspore/nn/utils/init.py +71 -0
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +45 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
- mindspore/ops/auto_generate/gen_extend_func.py +33 -0
- mindspore/ops/auto_generate/gen_ops_def.py +52 -3
- mindspore/ops/auto_generate/gen_ops_prim.py +158 -8
- mindspore/ops/function/array_func.py +2 -0
- mindspore/ops/function/math_func.py +12 -5
- mindspore/ops/function/random_func.py +221 -7
- mindspore/ops/operations/__init__.py +1 -1
- mindspore/ops/operations/array_ops.py +3 -1
- mindspore/ops/operations/comm_ops.py +25 -1
- mindspore/ops/operations/custom_ops.py +6 -4
- mindspore/ops/operations/manually_defined/ops_def.py +8 -10
- mindspore/ops/operations/nn_ops.py +7 -2
- mindspore/parallel/_auto_parallel_context.py +26 -5
- mindspore/parallel/_cell_wrapper.py +24 -3
- mindspore/parallel/_tensor.py +46 -2
- mindspore/parallel/_utils.py +39 -21
- mindspore/parallel/transform_safetensors.py +196 -43
- mindspore/profiler/profiling.py +5 -1
- mindspore/run_check/_check_version.py +20 -9
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +92 -32
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_on_request_exit.py +12 -1
- mindspore/train/callback/_tft_register.py +33 -9
- mindspore/train/dataset_helper.py +10 -2
- mindspore/train/model.py +21 -0
- mindspore/train/serialization.py +12 -19
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/METADATA +9 -7
- {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/RECORD +87 -85
- {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/WHEEL +0 -0
- {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
- {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
mindspore/nn/__init__.py
CHANGED
|
@@ -31,6 +31,7 @@ from mindspore.nn.wrap import *
|
|
|
31
31
|
from mindspore.nn.grad import Jvp, Vjp
|
|
32
32
|
from mindspore.nn.sparse import *
|
|
33
33
|
from mindspore.nn.reinforcement import *
|
|
34
|
+
from mindspore.nn.utils import *
|
|
34
35
|
|
|
35
36
|
__all__ = ["Cell", "GraphCell"]
|
|
36
37
|
__all__.extend(layer.__all__)
|
|
@@ -43,5 +44,6 @@ __all__.extend(sparse.__all__)
|
|
|
43
44
|
__all__.extend(learning_rate_schedule.__all__)
|
|
44
45
|
__all__.extend(dynamic_lr.__all__)
|
|
45
46
|
__all__.extend(reinforcement.__all__)
|
|
47
|
+
__all__.extend(utils.__all__)
|
|
46
48
|
|
|
47
49
|
__all__.sort()
|
mindspore/nn/cell.py
CHANGED
|
@@ -32,7 +32,8 @@ from mindspore import context
|
|
|
32
32
|
from mindspore._c_expression import init_pipeline, update_func_graph_hyper_params, Cell_, FuncGraph, MixedPrecisionType
|
|
33
33
|
from mindspore import _checkparam as Validator
|
|
34
34
|
from mindspore.common import dtype as mstype
|
|
35
|
-
from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache,
|
|
35
|
+
from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache, \
|
|
36
|
+
_no_grad
|
|
36
37
|
from mindspore.common.api import _generate_branch_control_input, _convert_python_data, _get_args_for_run_predict
|
|
37
38
|
from mindspore.common.api import _process_dyn_args, _generate_dyn_compile_args
|
|
38
39
|
from mindspore.common.parameter import Parameter, ParameterTuple
|
|
@@ -45,6 +46,7 @@ from mindspore._check_jit_forbidden_api import jit_forbidden_register
|
|
|
45
46
|
from mindspore.common._decorator import deprecated
|
|
46
47
|
from mindspore.common._register_for_recompute import recompute_registry
|
|
47
48
|
|
|
49
|
+
|
|
48
50
|
class Cell(Cell_):
|
|
49
51
|
"""
|
|
50
52
|
The basic building block of neural networks in MindSpore. The model or neural network layer should inherit this
|
|
@@ -1820,9 +1822,6 @@ class Cell(Cell_):
|
|
|
1820
1822
|
if not hasattr(self, "_func_graph_flags"):
|
|
1821
1823
|
self._func_graph_flags = {}
|
|
1822
1824
|
self._func_graph_flags.update({**flags})
|
|
1823
|
-
if context._get_mode() == context.PYNATIVE_MODE and self._func_graph_flags.get("output_no_recompute"):
|
|
1824
|
-
raise TypeError("Recompute is not supported in PyNative mode currently, you can use "
|
|
1825
|
-
"'context.set_context(mode=context.GRAPH_MODE)' or @jit to set graph mode.")
|
|
1826
1825
|
self.__dict__.update({**flags})
|
|
1827
1826
|
self._add_mixed_precision_flag(**flags)
|
|
1828
1827
|
return self
|
|
@@ -2585,6 +2584,7 @@ class Cell(Cell_):
|
|
|
2585
2584
|
"""
|
|
2586
2585
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
2587
2586
|
self._recompute_cell = recompute_registry.get()(self.construct)
|
|
2587
|
+
self._add_recompute_flag()
|
|
2588
2588
|
return
|
|
2589
2589
|
self._recompute()
|
|
2590
2590
|
if 'mp_comm_recompute' in kwargs.keys():
|
|
@@ -2687,6 +2687,18 @@ class Cell(Cell_):
|
|
|
2687
2687
|
if hasattr(network, "_amp_level"):
|
|
2688
2688
|
self._amp_level = getattr(network, "_amp_level")
|
|
2689
2689
|
|
|
2690
|
+
def _add_recompute_flag(self):
|
|
2691
|
+
"""
|
|
2692
|
+
Set pynative cell recomputed.
|
|
2693
|
+
"""
|
|
2694
|
+
if not self._has_config_recompute:
|
|
2695
|
+
self._has_config_recompute = True
|
|
2696
|
+
else:
|
|
2697
|
+
logger.info("The recompute interface can be configured only once."
|
|
2698
|
+
" If the parent cell is configured, the child cell should not be configured")
|
|
2699
|
+
for cell in self.cells():
|
|
2700
|
+
cell._add_recompute_flag()
|
|
2701
|
+
|
|
2690
2702
|
|
|
2691
2703
|
class GraphCell(Cell):
|
|
2692
2704
|
"""
|
mindspore/nn/layer/basic.py
CHANGED
|
@@ -579,11 +579,15 @@ class Identity(Cell):
|
|
|
579
579
|
r"""
|
|
580
580
|
A placeholder identity operator that returns the same as input.
|
|
581
581
|
|
|
582
|
+
Args:
|
|
583
|
+
args (Any): Any argument.
|
|
584
|
+
kwargs (Any): Any keyword argument.
|
|
585
|
+
|
|
582
586
|
Inputs:
|
|
583
|
-
- **
|
|
587
|
+
- **input** (Any) - The input of Identity.
|
|
584
588
|
|
|
585
589
|
Outputs:
|
|
586
|
-
The same as `
|
|
590
|
+
The same as `input`.
|
|
587
591
|
|
|
588
592
|
Supported Platforms:
|
|
589
593
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -592,19 +596,19 @@ class Identity(Cell):
|
|
|
592
596
|
>>> import mindspore
|
|
593
597
|
>>> from mindspore import Tensor, nn
|
|
594
598
|
>>> import numpy as np
|
|
595
|
-
>>>
|
|
599
|
+
>>> input = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
|
|
596
600
|
>>> net = nn.Identity()
|
|
597
|
-
>>> output = net(
|
|
601
|
+
>>> output = net(input)
|
|
598
602
|
>>> print(output)
|
|
599
603
|
[1 2 3 4]
|
|
600
604
|
"""
|
|
601
605
|
|
|
602
|
-
def __init__(self):
|
|
606
|
+
def __init__(self, *args, **kwargs):
|
|
603
607
|
"""Initialize Identity."""
|
|
604
608
|
super(Identity, self).__init__()
|
|
605
609
|
|
|
606
|
-
def construct(self,
|
|
607
|
-
return
|
|
610
|
+
def construct(self, input):
|
|
611
|
+
return input
|
|
608
612
|
|
|
609
613
|
|
|
610
614
|
class Dense(Cell):
|
|
@@ -621,6 +625,9 @@ class Dense(Cell):
|
|
|
621
625
|
data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
|
|
622
626
|
with the same data type as the :math:`X` created by the layer (only if has_bias is True).
|
|
623
627
|
|
|
628
|
+
.. warning::
|
|
629
|
+
In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
|
|
630
|
+
|
|
624
631
|
Args:
|
|
625
632
|
in_channels (int): The number of channels in the input space.
|
|
626
633
|
out_channels (int): The number of channels in the output space.
|
|
@@ -635,6 +642,8 @@ class Dense(Cell):
|
|
|
635
642
|
layer. Both activation name, e.g. 'relu', and mindspore activation function, e.g. mindspore.ops.ReLU(),
|
|
636
643
|
are supported. Default: ``None`` .
|
|
637
644
|
dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``mstype.float32`` .
|
|
645
|
+
When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
|
|
646
|
+
in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
|
|
638
647
|
|
|
639
648
|
Inputs:
|
|
640
649
|
- **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`. The `in_channels` in `Args` should be equal
|
|
@@ -651,6 +660,7 @@ class Dense(Cell):
|
|
|
651
660
|
is not equal to `out_channels` or shape[1] of `weight_init` is not equal to `in_channels`.
|
|
652
661
|
ValueError: If length of shape of `bias_init` is not equal to 1
|
|
653
662
|
or shape[0] of `bias_init` is not equal to `out_channels`.
|
|
663
|
+
RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
|
|
654
664
|
|
|
655
665
|
Supported Platforms:
|
|
656
666
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -752,6 +762,9 @@ class Linear(Cell):
|
|
|
752
762
|
.. math::
|
|
753
763
|
\text{outputs} = X * kernel + bias
|
|
754
764
|
|
|
765
|
+
.. warning::
|
|
766
|
+
In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
|
|
767
|
+
|
|
755
768
|
where :math:`X` is the input tensors, :math:`\text{kernel}` is a weight matrix with the same
|
|
756
769
|
data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
|
|
757
770
|
with the same data type as the :math:`X` created by the layer (only if has_bias is True).
|
|
@@ -767,6 +780,9 @@ class Linear(Cell):
|
|
|
767
780
|
same as `x`. The values of str refer to the function `initializer`. Default: ``None`` ,
|
|
768
781
|
bias will be initialized using Uniform.
|
|
769
782
|
dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``None`` .
|
|
783
|
+
If `dtype` is ``None`` , `dtype` is set to ``mstype.float32`` when initializing the method.
|
|
784
|
+
When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
|
|
785
|
+
in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
|
|
770
786
|
|
|
771
787
|
Inputs:
|
|
772
788
|
- **x** (Tensor) - Tensor of shape :math:`(*, in\_features)`. The `in_features` in `Args` should be equal
|
|
@@ -782,6 +798,7 @@ class Linear(Cell):
|
|
|
782
798
|
is not equal to `out_features` or shape[1] of `weight_init` is not equal to `in_features`.
|
|
783
799
|
ValueError: If length of shape of `bias_init` is not equal to 1
|
|
784
800
|
or shape[0] of `bias_init` is not equal to `out_features`.
|
|
801
|
+
RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
|
|
785
802
|
|
|
786
803
|
Supported Platforms:
|
|
787
804
|
``Ascend`` ``GPU`` ``CPU``
|
mindspore/nn/layer/conv.py
CHANGED
|
@@ -862,6 +862,9 @@ class Conv3dTranspose(_Conv):
|
|
|
862
862
|
However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
|
|
863
863
|
can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
|
|
864
864
|
|
|
865
|
+
Note:
|
|
866
|
+
For Atlas A2 training series products, `output_padding` is currently not supported.
|
|
867
|
+
|
|
865
868
|
Args:
|
|
866
869
|
in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer.
|
|
867
870
|
out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer.
|
mindspore/nn/layer/embedding.py
CHANGED
|
@@ -164,11 +164,11 @@ class Embedding(Cell):
|
|
|
164
164
|
|
|
165
165
|
class EmbeddingExt(Cell):
|
|
166
166
|
r"""
|
|
167
|
-
|
|
168
|
-
Retrieve the word embeddings in weight stored in the layer using indices specified in `input`.
|
|
167
|
+
The value in `input` is used as the index, and the corresponding embedding vector is queried from `weight` .
|
|
169
168
|
|
|
170
169
|
.. warning::
|
|
171
|
-
|
|
170
|
+
- This is an experimental API that is subject to change or deletion.
|
|
171
|
+
- On Ascend, the behavior is unpredictable when the value of `input` is invalid.
|
|
172
172
|
|
|
173
173
|
Args:
|
|
174
174
|
num_embeddings (int): Size of the dictionary of embeddings.
|
|
@@ -183,14 +183,22 @@ class EmbeddingExt(Cell):
|
|
|
183
183
|
norm_type (float, optional): Indicated the value of p in p-norm. Default ``2.0``.
|
|
184
184
|
scale_grad_by_freq (bool, optional): If ``True`` the gradients will be scaled by the inverse of frequency
|
|
185
185
|
of the index in `input`. Default ``False``.
|
|
186
|
-
|
|
186
|
+
sparse (bool, optional): If ``True``, gradient w.r.t. `weight` matrix will be a sparse tensor which
|
|
187
|
+
has not been supported. Default: ``False``.
|
|
188
|
+
_weight (Tensor, optional): Used to initialize the `weight` of Embedding. If ``None``, the weight will be
|
|
187
189
|
initialized from normal distribution :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})`. Default ``None``.
|
|
188
|
-
|
|
189
|
-
Default: ``
|
|
190
|
+
_freeze(bool, optional): If `weight` , the learnable weights of this module, should be freezed.
|
|
191
|
+
Default: ``False``.
|
|
192
|
+
dtype (mindspore.dtype, optional) : Dtype of Embedding's `weight` . It is meaningless when `_weight` is
|
|
193
|
+
not None. Default: ``None``.
|
|
194
|
+
|
|
195
|
+
Variables:
|
|
196
|
+
weight (Parameter): The learnable weights of this module of shape (num_embeddings, embedding_dim), which
|
|
197
|
+
initialized from :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})` or `_weight` .
|
|
190
198
|
|
|
191
199
|
Inputs:
|
|
192
200
|
- **input** (Tensor) - The indices used to lookup in the embedding vector. The data type must be
|
|
193
|
-
|
|
201
|
+
int32 or int64, and the value should be in range `[0, num_embeddings)`.
|
|
194
202
|
|
|
195
203
|
Outputs:
|
|
196
204
|
Tensor, has the same data type as weight, the shape is :math:`(*input.shape, embedding\_dim)`.
|
|
@@ -202,6 +210,7 @@ class EmbeddingExt(Cell):
|
|
|
202
210
|
TypeError: If `max_norm` is not a float.
|
|
203
211
|
TypeError: If `norm_type` is not a float.
|
|
204
212
|
TypeError: If `scale_grad_by_freq` is not a bool.
|
|
213
|
+
ValueError: If `weight.shape` is invalid.
|
|
205
214
|
TypeError: If `dtype` is not one of mindspore.dtype.
|
|
206
215
|
|
|
207
216
|
Supported Platforms:
|
|
@@ -212,7 +221,7 @@ class EmbeddingExt(Cell):
|
|
|
212
221
|
>>> import numpy as np
|
|
213
222
|
>>> from mindspore import Tensor, nn
|
|
214
223
|
>>> input = Tensor([[1, 0, 1, 1], [0, 0, 1, 0]])
|
|
215
|
-
>>> embedding = nn.
|
|
224
|
+
>>> embedding = nn.EmbeddingExt(num_embeddings=10, embedding_dim=3)
|
|
216
225
|
>>> output = embedding(input)
|
|
217
226
|
>>> print(output)
|
|
218
227
|
[[[-0.0024154 -0.01203444 0.00811537]
|
|
@@ -226,23 +235,30 @@ class EmbeddingExt(Cell):
|
|
|
226
235
|
"""
|
|
227
236
|
|
|
228
237
|
def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0,
|
|
229
|
-
scale_grad_by_freq=False, _weight=None, dtype=
|
|
238
|
+
scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, dtype=None):
|
|
230
239
|
"""Initialize Embedding."""
|
|
231
240
|
super().__init__()
|
|
241
|
+
self.sparse = Validator.check_value_type('sparse', sparse, [bool], self.cls_name)
|
|
242
|
+
if self.sparse:
|
|
243
|
+
raise ValueError("For Embedding, the scenerio, where `sparse` is True, has not be supported.")
|
|
232
244
|
self.num_embeddings = Validator.check_value_type(
|
|
233
245
|
'num_embeddings', num_embeddings, [int], self.cls_name)
|
|
234
246
|
self.embedding_dim = Validator.check_value_type(
|
|
235
247
|
'embedding_dim', embedding_dim, [int], self.cls_name)
|
|
248
|
+
self.dtype = dtype if dtype is not None else mstype.float32
|
|
236
249
|
Validator.check_subclass(
|
|
237
|
-
"dtype", dtype, mstype.number_type, self.cls_name)
|
|
238
|
-
self.dtype = dtype
|
|
250
|
+
"dtype", self.dtype, mstype.number_type, self.cls_name)
|
|
239
251
|
self.padding_idx = padding_idx
|
|
240
252
|
if _weight is None:
|
|
241
|
-
init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=dtype, init=Normal(1, 0))
|
|
253
|
+
init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=self.dtype, init=Normal(1, 0))
|
|
242
254
|
init_tensor = self._zero_weight_by_index(init_tensor)
|
|
243
|
-
self.weight = Parameter(init_tensor, name='weight')
|
|
255
|
+
self.weight = Parameter(init_tensor, name='weight', requires_grad=not _freeze)
|
|
244
256
|
else:
|
|
245
|
-
|
|
257
|
+
if _weight.shape != (num_embeddings, embedding_dim):
|
|
258
|
+
raise ValueError(f"For Embedding, shape of weight should be match with num_embeddings "
|
|
259
|
+
f"and embedding_dim, but got weight.shape: {_weight.shape}, "
|
|
260
|
+
f"and (num_embeddings, embedding_dim): ({num_embeddings}, {embedding_dim})")
|
|
261
|
+
self.weight = Parameter(_weight, name='weight', requires_grad=not _freeze)
|
|
246
262
|
|
|
247
263
|
self.max_norm = max_norm
|
|
248
264
|
if max_norm is not None:
|
|
@@ -300,6 +316,7 @@ class EmbeddingLookup(Cell):
|
|
|
300
316
|
specified 'axis = 0' to lookup table.
|
|
301
317
|
In field slice mode, the manual_shapes must be given. It is a tuple ,where
|
|
302
318
|
the element is vocab[i], vocab[i] is the row numbers for i-th part.
|
|
319
|
+
This module does not support the PyNative mode.
|
|
303
320
|
|
|
304
321
|
Args:
|
|
305
322
|
vocab_size (int): Size of the dictionary of embeddings.
|
mindspore/nn/layer/pooling.py
CHANGED
|
@@ -297,6 +297,9 @@ class MaxPool3d(_PoolNd):
|
|
|
297
297
|
\max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
|
|
298
298
|
\text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
|
|
299
299
|
|
|
300
|
+
.. note::
|
|
301
|
+
For Atlas training series products, this interface is not supported.
|
|
302
|
+
|
|
300
303
|
Args:
|
|
301
304
|
kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
|
|
302
305
|
is an int number or a single element tuple that represents depth, height and width of the kernel, or a tuple
|
|
@@ -1032,16 +1035,11 @@ class AvgPool2dExt(Cell):
|
|
|
1032
1035
|
>>> import numpy as np
|
|
1033
1036
|
>>> from mindspore import Tensor, nn
|
|
1034
1037
|
>>> from mindspore import dtype as mstype
|
|
1035
|
-
>>>
|
|
1036
|
-
>>>
|
|
1037
|
-
>>> output =
|
|
1038
|
-
>>> print(output)
|
|
1039
|
-
|
|
1040
|
-
[ 6.5 7.5 8.5]]
|
|
1041
|
-
[[14.5 15.5 16.5]
|
|
1042
|
-
[18.5 19.5 20.5]]
|
|
1043
|
-
[[26.5 27.5 28.5]
|
|
1044
|
-
[30.5 31.5 32.5]]]]
|
|
1038
|
+
>>> input = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mstype.float32)
|
|
1039
|
+
>>> net = nn.AvgPool2dExt(kernel_size=2, stride=1)
|
|
1040
|
+
>>> output = net(input)
|
|
1041
|
+
>>> print(output.shape)
|
|
1042
|
+
(1, 3, 2, 3)
|
|
1045
1043
|
"""
|
|
1046
1044
|
def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
|
|
1047
1045
|
count_include_pad=True, divisor_override=None):
|
|
@@ -20,6 +20,8 @@ from mindspore.common.tensor import Tensor
|
|
|
20
20
|
from mindspore.nn.optim.optimizer import Optimizer
|
|
21
21
|
from mindspore.ops.operations.manually_defined._inner import TensorReport
|
|
22
22
|
from mindspore import ops, context
|
|
23
|
+
from mindspore.common.parameter import Parameter
|
|
24
|
+
import mindspore.common.dtype as mstype
|
|
23
25
|
|
|
24
26
|
class OptTFTWrapper(Optimizer):
|
|
25
27
|
r"""
|
|
@@ -61,9 +63,9 @@ class OptTFTWrapper(Optimizer):
|
|
|
61
63
|
"""
|
|
62
64
|
|
|
63
65
|
def __init__(self, opt, **kwargs):
|
|
64
|
-
super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
|
|
65
66
|
if not isinstance(opt, Optimizer):
|
|
66
67
|
raise TypeError(f"For 'OptTFTWrapper', the argument 'opt' must be Optimizer type, " f"but got {type(opt)}.")
|
|
68
|
+
super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
|
|
67
69
|
tft_env = os.getenv("MS_ENABLE_TFT", "")
|
|
68
70
|
if ("TTP:1" not in tft_env) and ("UCE:1" not in tft_env):
|
|
69
71
|
raise ValueError("MindIO TFT regitster need custom switch on[MS_ENABLE_TFT='{TTP:1,UCE:1}']!")
|
|
@@ -74,13 +76,9 @@ class OptTFTWrapper(Optimizer):
|
|
|
74
76
|
self.opt = opt
|
|
75
77
|
self.report = TensorReport()
|
|
76
78
|
self.depend = ops.Depend()
|
|
77
|
-
self.
|
|
78
|
-
|
|
79
|
-
self.
|
|
80
|
-
|
|
81
|
-
if self.use_allreduce:
|
|
82
|
-
self.allreduce_sum = ops.AllReduce()
|
|
83
|
-
self.allreduce_sum.add_prim_attr("tft_report_before", True)
|
|
79
|
+
self.allreduce_sum = ops.AllReduce()
|
|
80
|
+
self.allreduce_sum.add_prim_attr("tft_report_before", True)
|
|
81
|
+
self.tft_g_one_flag = Parameter(Tensor([1], dtype=mstype.int32))
|
|
84
82
|
|
|
85
83
|
self.param_rank = opt.param_rank
|
|
86
84
|
self.optim_filter = opt.optim_filter
|
|
@@ -118,10 +116,9 @@ class OptTFTWrapper(Optimizer):
|
|
|
118
116
|
self.enable_tuple_broaden = opt.enable_tuple_broaden
|
|
119
117
|
|
|
120
118
|
def construct(self, gradients):
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return self.opt(gradients)
|
|
119
|
+
tft_g_one_flag = self.depend(self.tft_g_one_flag, gradients)
|
|
120
|
+
self.tft_g_one_flag = self.allreduce_sum(tft_g_one_flag)
|
|
121
|
+
|
|
122
|
+
grads = self.depend(gradients, self.report("tft_report", self.tft_g_one_flag))
|
|
123
|
+
opt_ret = self.opt(grads)
|
|
124
|
+
return opt_ret
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""
|
|
16
|
+
nn.utils.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import absolute_import
|
|
19
|
+
|
|
20
|
+
from .init import no_init_parameters
|
|
21
|
+
|
|
22
|
+
__all__ = ["no_init_parameters"]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
|
|
16
|
+
"""init for nn.Cell."""
|
|
17
|
+
from __future__ import absolute_import
|
|
18
|
+
|
|
19
|
+
from contextlib import contextmanager
|
|
20
|
+
from mindspore.common.parameter import Parameter
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@contextmanager
|
|
24
|
+
def no_init_parameters():
|
|
25
|
+
r"""
|
|
26
|
+
In scenarios where a checkpoint is loaded, parameters within the network instantiation will be
|
|
27
|
+
instantiated and occupy physical memory. Loading a checkpoint will replace the parameter values.
|
|
28
|
+
Decorator can be applied during network instantiation to add an attribute `init_param` to all
|
|
29
|
+
parameters within the current Cell, setting it to `init_param=False` .
|
|
30
|
+
When `init_param=False` is detected, the initialization of the parameters is skipped,
|
|
31
|
+
and the parameters are assigned values directly from the checkpoint during loading,
|
|
32
|
+
which can optimize performance and reduce physical memory usage.
|
|
33
|
+
|
|
34
|
+
Note:
|
|
35
|
+
Initialization of parameters created with `initializer` can only be skipped.
|
|
36
|
+
Parameters created by `Tensor` or `numpy` cannot be skipped.
|
|
37
|
+
|
|
38
|
+
Examples:
|
|
39
|
+
>>> import mindspore as ms
|
|
40
|
+
>>> from mindspore import nn, ops, load_checkpoint
|
|
41
|
+
>>> from mindspore.common.initializer import initializer
|
|
42
|
+
>>> from mindspore.nn.utils import no_init_parameters
|
|
43
|
+
>>> # 1. Add a decorator to the network that requires delayed initialization
|
|
44
|
+
>>> class Net(nn.Cell):
|
|
45
|
+
... def __init__(self, in_channels, out_channels):
|
|
46
|
+
... super().__init__()
|
|
47
|
+
... self.weight = ms.Parameter(initializer("normal", [in_channels, out_channels], ms.float32))
|
|
48
|
+
... self.bias = ms.Parameter(initializer("normal", [out_channels], ms.float32))
|
|
49
|
+
... self.matmul = ops.MatMul()
|
|
50
|
+
... self.add = ops.Add()
|
|
51
|
+
...
|
|
52
|
+
... def construct(self, x):
|
|
53
|
+
... x = self.matmul(x, self.weight)
|
|
54
|
+
... x = self.add(x, self.bias)
|
|
55
|
+
... return x
|
|
56
|
+
>>> with no_init_parameters():
|
|
57
|
+
... # After instantiation, all parameters in the net are not initialized
|
|
58
|
+
... net = Net(28*28, 64)
|
|
59
|
+
>>> # 2. Load checkpoint parameters to the net
|
|
60
|
+
>>> load_checkpoint('./checkpoint/test_net.ckpt', net=net)
|
|
61
|
+
>>> # 3. After loading the checkpoint, manually call init_parameters_data() to initialize
|
|
62
|
+
>>> # the uninitialized parameters in the net if need. If the network is executed,
|
|
63
|
+
>>> # the framework will automatically call this interface.
|
|
64
|
+
>>> net.init_parameters_data()
|
|
65
|
+
"""
|
|
66
|
+
init_class = Parameter
|
|
67
|
+
setattr(init_class, "init_param", False)
|
|
68
|
+
try:
|
|
69
|
+
yield
|
|
70
|
+
finally:
|
|
71
|
+
setattr(init_class, "init_param", True)
|
mindspore/opencv_core452.dll
CHANGED
|
Binary file
|
|
Binary file
|
mindspore/opencv_imgproc452.dll
CHANGED
|
Binary file
|
|
@@ -38,7 +38,6 @@ from mindspore.ops.operations.array_ops import SegmentMean
|
|
|
38
38
|
from mindspore.ops.operations.array_ops import AffineGrid
|
|
39
39
|
from mindspore.ops.operations.array_ops import MaskedScatter
|
|
40
40
|
from mindspore.ops.operations.array_ops import MaskedSelect
|
|
41
|
-
from mindspore.ops.operations.array_ops import CountNonZero
|
|
42
41
|
from mindspore.ops.operations.random_ops import LogNormalReverse
|
|
43
42
|
from mindspore.ops.operations.random_ops import ParameterizedTruncatedNormal
|
|
44
43
|
from mindspore.ops.operations import _inner_ops as inner
|
|
@@ -125,16 +124,6 @@ def get_bprop_masked_scatter(self):
|
|
|
125
124
|
return bprop
|
|
126
125
|
|
|
127
126
|
|
|
128
|
-
@bprop_getters.register(CountNonZero)
|
|
129
|
-
def get_bprop_countnonzero(self):
|
|
130
|
-
"""Grad definition for CountNonZero"""
|
|
131
|
-
|
|
132
|
-
def bprop(x, out, dout):
|
|
133
|
-
return (zeros_like(x),)
|
|
134
|
-
|
|
135
|
-
return bprop
|
|
136
|
-
|
|
137
|
-
|
|
138
127
|
@bprop_getters.register(Mvlgamma)
|
|
139
128
|
def get_bprop_mvlgamma(self):
|
|
140
129
|
"""Grad definition for Mvlgamma"""
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"""Generate bprop for comm ops"""
|
|
17
17
|
from __future__ import division
|
|
18
18
|
from __future__ import absolute_import
|
|
19
|
-
from mindspore import Tensor
|
|
19
|
+
from mindspore import Tensor, Parameter
|
|
20
20
|
import mindspore.common.dtype as mstype
|
|
21
21
|
from mindspore.ops import functional as F
|
|
22
22
|
from mindspore.communication import get_rank, get_group_size
|
|
@@ -31,11 +31,15 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
|
|
|
31
31
|
_GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
|
|
32
32
|
ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
|
|
33
33
|
_VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
|
|
34
|
-
_MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter
|
|
34
|
+
_MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter,
|
|
35
|
+
_VirtualAssignKvCache)
|
|
35
36
|
from mindspore.ops._grad_experimental.grad_base import bprop_getters
|
|
36
37
|
from mindspore.ops.operations import _grad_ops as G
|
|
37
38
|
import mindspore as ms
|
|
38
39
|
|
|
40
|
+
_device_local_norm = None
|
|
41
|
+
if ms.get_auto_parallel_context("dump_device_local_norm"):
|
|
42
|
+
_device_local_norm = Parameter(Tensor(0.0, mstype.float32), name="_device_local_norm", requires_grad=False)
|
|
39
43
|
|
|
40
44
|
@bprop_getters.register(AllReduce)
|
|
41
45
|
def get_bprop_all_reduce(self):
|
|
@@ -179,6 +183,24 @@ def get_bprop_virtual_assign_add(self):
|
|
|
179
183
|
return bprop
|
|
180
184
|
|
|
181
185
|
|
|
186
|
+
@bprop_getters.register(_VirtualAssignKvCache)
|
|
187
|
+
def get_bprop_virtual_assign_kv_cache(self):
|
|
188
|
+
"""Generate bprop for VirtualAssignAdd."""
|
|
189
|
+
assign = P.Assign()
|
|
190
|
+
cast = P.Cast()
|
|
191
|
+
dtype = P.DType()
|
|
192
|
+
out_tensor = Tensor(0.0, mstype.float16)
|
|
193
|
+
|
|
194
|
+
def bprop(x, y, seq_chunk, out, dout):
|
|
195
|
+
dout_update = dout + y
|
|
196
|
+
kv_equal = F.equal(seq_chunk, 0)
|
|
197
|
+
update_kv = F.select(kv_equal, F.broadcast_to(cast(out_tensor, dtype(y)), F.shape(y)), dout_update)
|
|
198
|
+
return F.depend((dout_update, cast(out_tensor, dtype(y)),
|
|
199
|
+
cast(out_tensor, dtype(seq_chunk))), assign(y, update_kv))
|
|
200
|
+
|
|
201
|
+
return bprop
|
|
202
|
+
|
|
203
|
+
|
|
182
204
|
@bprop_getters.register(_VirtualAccuGrad)
|
|
183
205
|
def get_bprop_virtual_accu_grad(self):
|
|
184
206
|
"""Generate bprop for VirtualAccuGrad."""
|
|
@@ -228,10 +250,15 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
228
250
|
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
229
251
|
square = P.Square()
|
|
230
252
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
253
|
+
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
231
254
|
|
|
232
255
|
def bprop(x, z, out, dout):
|
|
233
|
-
if dump_local_norm:
|
|
234
|
-
|
|
256
|
+
if dump_local_norm or dump_device_local_norm:
|
|
257
|
+
_norm = reduce_sum(square((z)))
|
|
258
|
+
if dump_local_norm:
|
|
259
|
+
z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
|
|
260
|
+
if dump_device_local_norm:
|
|
261
|
+
z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
|
|
235
262
|
real_grad = z
|
|
236
263
|
assign_out = dout
|
|
237
264
|
if issubclass_(F.typeof(dout), mstype.tensor_type):
|
|
@@ -354,6 +381,7 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
354
381
|
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
355
382
|
square = P.Square()
|
|
356
383
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
384
|
+
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
357
385
|
|
|
358
386
|
def bprop(x, z, out, dout):
|
|
359
387
|
if with_mirror_operator:
|
|
@@ -364,8 +392,12 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
364
392
|
real_grad = F.tensor_mul(real_grad, scale)
|
|
365
393
|
return (real_grad, cast(out_tensor, dtype(z)))
|
|
366
394
|
z = F.depend(z, dout)
|
|
367
|
-
if dump_local_norm:
|
|
368
|
-
|
|
395
|
+
if dump_local_norm or dump_device_local_norm:
|
|
396
|
+
_norm = reduce_sum(square((z)))
|
|
397
|
+
if dump_local_norm:
|
|
398
|
+
z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
|
|
399
|
+
if dump_device_local_norm:
|
|
400
|
+
z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
|
|
369
401
|
if not do_mirror:
|
|
370
402
|
return (z, cast(out_tensor, dtype(z)))
|
|
371
403
|
real_grad = reduce_scatter(z)
|
|
@@ -567,6 +599,7 @@ def get_bprop_mirror_operator(self):
|
|
|
567
599
|
|
|
568
600
|
dev_num_r = 1.0
|
|
569
601
|
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
602
|
+
dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
|
|
570
603
|
if dev_num > 1:
|
|
571
604
|
dev_num_r = 1.0 / dev_num
|
|
572
605
|
all_reduce = AllReduce(group=group)
|
|
@@ -589,8 +622,12 @@ def get_bprop_mirror_operator(self):
|
|
|
589
622
|
all_reduce.set_prim_instance_name(instance_name)
|
|
590
623
|
|
|
591
624
|
def bprop(x, out, dout):
|
|
592
|
-
if dump_local_norm:
|
|
593
|
-
|
|
625
|
+
if dump_local_norm or dump_device_local_norm:
|
|
626
|
+
_norm = reduce_sum(square((dout)))
|
|
627
|
+
if dump_local_norm:
|
|
628
|
+
dout = F.depend(dout, ln_print("dump local norm: ", param_name, _norm))
|
|
629
|
+
if dump_device_local_norm:
|
|
630
|
+
dout = F.depend(dout, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
|
|
594
631
|
|
|
595
632
|
if dev_num == 1:
|
|
596
633
|
return (dout,)
|
|
@@ -65,6 +65,7 @@ op_args_default_value = {
|
|
|
65
65
|
"ConvolutionGrad": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1, "output_mask": ()},
|
|
66
66
|
"Convolution": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1},
|
|
67
67
|
"Correlate": {"mode": 'valid'},
|
|
68
|
+
"CountNonZero": {"dim": None},
|
|
68
69
|
"Cross": {"dim": -65530},
|
|
69
70
|
"CumProd": {"exclusive": False, "reverse": False},
|
|
70
71
|
"CumSum": {"exclusive": False, "reverse": False},
|
|
@@ -185,6 +186,11 @@ op_args_default_value = {
|
|
|
185
186
|
"Qr": {"full_matrices": False},
|
|
186
187
|
"RandExt": {"dtype": None},
|
|
187
188
|
"RandLikeExt": {"dtype": None},
|
|
189
|
+
"RandIntLike": {"dtype": None},
|
|
190
|
+
"RandInt": {"dtype": None},
|
|
191
|
+
"RandnLike": {"dtype": None},
|
|
192
|
+
"Randn": {"dtype": None},
|
|
193
|
+
"RandpermExt": {"dtype": mstype.int64},
|
|
188
194
|
"RandpermV2": {"seed": 0, "offset": 0, "dtype": mstype.int64},
|
|
189
195
|
"Range": {"maxlen": 1000000},
|
|
190
196
|
"ReduceAll": {"axis": None, "keep_dims": False},
|