mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/initializer.py +51 -15
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +62 -15
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +183 -37
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +315 -60
- mindspore/communication/management.py +14 -14
- mindspore/context.py +132 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +983 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +268 -23
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +26 -13
- mindspore/mint/special/__init__.py +63 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +276 -96
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +137 -10
- mindspore/nn/layer/embedding.py +137 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +124 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +767 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
- mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +492 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +564 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +402 -12
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +7 -2
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +14 -146
- mindspore/ops/operations/comm_ops.py +63 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +273 -20
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +31 -9
- mindspore/parallel/_cell_wrapper.py +85 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +127 -13
- mindspore/parallel/_utils.py +53 -22
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +1146 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +285 -413
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +39 -104
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +105 -19
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +97 -31
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +145 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +375 -0
- mindspore/train/dataset_helper.py +15 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +154 -58
- mindspore/train/serialization.py +342 -128
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +260 -254
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +1 -1
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/ops/__init__.py
CHANGED
|
@@ -29,13 +29,14 @@ from mindspore.ops.vm_impl_registry import get_vm_impl_fn, vm_impl_registry
|
|
|
29
29
|
from mindspore.ops.op_info_register import op_info_register, custom_info_register, AkgGpuRegOp, AkgAscendRegOp, \
|
|
30
30
|
AiCPURegOp, TBERegOp, CpuRegOp, CustomRegOp, DataType
|
|
31
31
|
from mindspore.ops.primitive import constexpr
|
|
32
|
-
from mindspore.ops import composite, operations, functional, function
|
|
32
|
+
from mindspore.ops import composite, operations, functional, function
|
|
33
33
|
from mindspore.ops import signature
|
|
34
|
+
from mindspore.ops.auto_generate import cpp_create_prim_instance_helper, gen_arg_dtype_cast, gen_arg_handler, \
|
|
35
|
+
gen_extend_func, gen_ops_def, gen_ops_prim, pyboost_inner_prim
|
|
34
36
|
from mindspore.ops.composite import *
|
|
35
37
|
from mindspore.ops.operations import *
|
|
36
38
|
from mindspore.ops.function import *
|
|
37
39
|
from mindspore.ops.functional import *
|
|
38
|
-
from mindspore.ops.silent_check import _silent_check
|
|
39
40
|
|
|
40
41
|
__primitive__ = [
|
|
41
42
|
"prim_attr_register", "prim_arg_register", "Primitive", "PrimitiveWithInfer", "PrimitiveWithCheck", "signature"
|
|
@@ -44,11 +45,12 @@ __primitive__ = [
|
|
|
44
45
|
__all__ = ["get_vm_impl_fn", "vm_impl_registry",
|
|
45
46
|
"op_info_register", "custom_info_register", "AkgGpuRegOp", "AkgAscendRegOp", "AiCPURegOp", "TBERegOp",
|
|
46
47
|
"CpuRegOp", "CustomRegOp", "DataType",
|
|
47
|
-
"constexpr", "reshard"
|
|
48
|
+
"constexpr", "reshard",
|
|
49
|
+
"cpp_create_prim_instance_helper", "gen_arg_dtype_cast", "gen_arg_handler", "gen_extend_func", "gen_ops_def",
|
|
50
|
+
"gen_ops_prim", "pyboost_inner_prim"]
|
|
48
51
|
__all__.extend(__primitive__)
|
|
49
52
|
__all__.extend(composite.__all__)
|
|
50
53
|
__all__.extend(operations.__all__)
|
|
51
54
|
__all__.extend(functional.__all__)
|
|
52
55
|
__all__.extend(function.__all__)
|
|
53
56
|
__all__.extend(auto_generate.__all__)
|
|
54
|
-
_silent_check()
|
|
@@ -38,7 +38,6 @@ from mindspore.ops.operations.array_ops import SegmentMean
|
|
|
38
38
|
from mindspore.ops.operations.array_ops import AffineGrid
|
|
39
39
|
from mindspore.ops.operations.array_ops import MaskedScatter
|
|
40
40
|
from mindspore.ops.operations.array_ops import MaskedSelect
|
|
41
|
-
from mindspore.ops.operations.array_ops import CountNonZero
|
|
42
41
|
from mindspore.ops.operations.random_ops import LogNormalReverse
|
|
43
42
|
from mindspore.ops.operations.random_ops import ParameterizedTruncatedNormal
|
|
44
43
|
from mindspore.ops.operations import _inner_ops as inner
|
|
@@ -125,16 +124,6 @@ def get_bprop_masked_scatter(self):
|
|
|
125
124
|
return bprop
|
|
126
125
|
|
|
127
126
|
|
|
128
|
-
@bprop_getters.register(CountNonZero)
|
|
129
|
-
def get_bprop_countnonzero(self):
|
|
130
|
-
"""Grad definition for CountNonZero"""
|
|
131
|
-
|
|
132
|
-
def bprop(x, out, dout):
|
|
133
|
-
return (zeros_like(x),)
|
|
134
|
-
|
|
135
|
-
return bprop
|
|
136
|
-
|
|
137
|
-
|
|
138
127
|
@bprop_getters.register(Mvlgamma)
|
|
139
128
|
def get_bprop_mvlgamma(self):
|
|
140
129
|
"""Grad definition for Mvlgamma"""
|
|
@@ -31,9 +31,11 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
|
|
|
31
31
|
_GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
|
|
32
32
|
ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
|
|
33
33
|
_VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
|
|
34
|
-
_MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter
|
|
34
|
+
_MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter,
|
|
35
|
+
_VirtualAssignKvCache)
|
|
35
36
|
from mindspore.ops._grad_experimental.grad_base import bprop_getters
|
|
36
37
|
from mindspore.ops.operations import _grad_ops as G
|
|
38
|
+
import mindspore as ms
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
@bprop_getters.register(AllReduce)
|
|
@@ -95,6 +97,12 @@ def get_bprop_send(self):
|
|
|
95
97
|
dtype = self.get_attr_dict()["dtype"]
|
|
96
98
|
tag = self.get_attr_dict()["sr_tag"]
|
|
97
99
|
send_grad = Receive(tag, self.rank, shape, dtype, self.group_back)
|
|
100
|
+
if "dst_global_rank" in self.get_attr_dict():
|
|
101
|
+
dst_global_rank = self.get_attr_dict().get("dst_global_rank")
|
|
102
|
+
send_grad.add_prim_attr("src_global_rank", dst_global_rank)
|
|
103
|
+
if "RING_ATTENTION_INDEX" in self.get_attr_dict():
|
|
104
|
+
ringattention = self.get_attr_dict().get("RING_ATTENTION_INDEX")
|
|
105
|
+
send_grad.add_prim_attr("RING_ATTENTION_INDEX", ringattention)
|
|
98
106
|
virtual_input = Tensor(0.0, dtype)
|
|
99
107
|
|
|
100
108
|
def bprop(x, out, dout):
|
|
@@ -108,8 +116,16 @@ def get_bprop_send(self):
|
|
|
108
116
|
def get_bprop_receive(self):
|
|
109
117
|
"""Generate bprop for Receive."""
|
|
110
118
|
tag = self.get_attr_dict()["sr_tag"]
|
|
119
|
+
flash_tag = self.get_attr_dict().get("flash_tag")
|
|
111
120
|
receive_grad = Send(tag, self.rank, self.group_back)
|
|
112
|
-
|
|
121
|
+
shape = self.get_attr_dict()["shape"]
|
|
122
|
+
receive_grad.add_prim_attr("shape", shape)
|
|
123
|
+
if "src_global_rank" in self.get_attr_dict():
|
|
124
|
+
src_global_rank = self.get_attr_dict().get("src_global_rank")
|
|
125
|
+
receive_grad.add_prim_attr("dst_global_rank", src_global_rank)
|
|
126
|
+
if "RING_ATTENTION_INDEX" in self.get_attr_dict():
|
|
127
|
+
ringattention = self.get_attr_dict().get("RING_ATTENTION_INDEX")
|
|
128
|
+
receive_grad.add_prim_attr("RING_ATTENTION_INDEX", ringattention)
|
|
113
129
|
depend = P.Depend()
|
|
114
130
|
cast = P.Cast()
|
|
115
131
|
out_tensor = Tensor(0.0, mstype.float16)
|
|
@@ -117,7 +133,7 @@ def get_bprop_receive(self):
|
|
|
117
133
|
|
|
118
134
|
def bprop(x, out, dout):
|
|
119
135
|
send_out = receive_grad(dout)
|
|
120
|
-
if is_opt_shard:
|
|
136
|
+
if is_opt_shard or (flash_tag == "True"):
|
|
121
137
|
dx = depend(F.zeros_like(x), send_out)
|
|
122
138
|
else:
|
|
123
139
|
dx = depend(cast(out_tensor, F.dtype(x)), send_out)
|
|
@@ -164,6 +180,24 @@ def get_bprop_virtual_assign_add(self):
|
|
|
164
180
|
return bprop
|
|
165
181
|
|
|
166
182
|
|
|
183
|
+
@bprop_getters.register(_VirtualAssignKvCache)
|
|
184
|
+
def get_bprop_virtual_assign_kv_cache(self):
|
|
185
|
+
"""Generate bprop for VirtualAssignAdd."""
|
|
186
|
+
assign = P.Assign()
|
|
187
|
+
cast = P.Cast()
|
|
188
|
+
dtype = P.DType()
|
|
189
|
+
out_tensor = Tensor(0.0, mstype.float16)
|
|
190
|
+
|
|
191
|
+
def bprop(x, y, seq_chunk, out, dout):
|
|
192
|
+
dout_update = dout + y
|
|
193
|
+
kv_equal = F.equal(seq_chunk, 0)
|
|
194
|
+
update_kv = F.select(kv_equal, F.broadcast_to(cast(out_tensor, dtype(y)), F.shape(y)), dout_update)
|
|
195
|
+
return F.depend((dout_update, cast(out_tensor, dtype(y)),
|
|
196
|
+
cast(out_tensor, dtype(seq_chunk))), assign(y, update_kv))
|
|
197
|
+
|
|
198
|
+
return bprop
|
|
199
|
+
|
|
200
|
+
|
|
167
201
|
@bprop_getters.register(_VirtualAccuGrad)
|
|
168
202
|
def get_bprop_virtual_accu_grad(self):
|
|
169
203
|
"""Generate bprop for VirtualAccuGrad."""
|
|
@@ -186,6 +220,9 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
186
220
|
group = self.group
|
|
187
221
|
dev_num = self.dev_num
|
|
188
222
|
mean_flag = self.mean_flag
|
|
223
|
+
param_name = " "
|
|
224
|
+
if 'mirror_user_id' in self.get_attr_dict():
|
|
225
|
+
param_name = self.get_attr_dict()['mirror_user_id']
|
|
189
226
|
scale = 1 / dev_num
|
|
190
227
|
|
|
191
228
|
all_reduce = AllReduce(group=group)
|
|
@@ -196,7 +233,6 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
196
233
|
if hasattr(self, 'parameter'):
|
|
197
234
|
parameter = self.parameter
|
|
198
235
|
all_reduce.add_prim_attr("parameter", parameter)
|
|
199
|
-
|
|
200
236
|
if self.instance_name:
|
|
201
237
|
instance_name = "grad_mirror" + self.instance_name
|
|
202
238
|
all_reduce.set_prim_instance_name(instance_name)
|
|
@@ -207,8 +243,14 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
207
243
|
assign.add_prim_attr("parameter_micro", 0)
|
|
208
244
|
out_tensor = Tensor(1.0, mstype.float16)
|
|
209
245
|
opt_shard = _get_enable_parallel_optimizer()
|
|
246
|
+
ln_print = P.Print()
|
|
247
|
+
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
248
|
+
square = P.Square()
|
|
249
|
+
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
210
250
|
|
|
211
251
|
def bprop(x, z, out, dout):
|
|
252
|
+
if dump_local_norm:
|
|
253
|
+
z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
|
|
212
254
|
real_grad = z
|
|
213
255
|
assign_out = dout
|
|
214
256
|
if issubclass_(F.typeof(dout), mstype.tensor_type):
|
|
@@ -309,6 +351,9 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
309
351
|
"""Generate bprop for _MicroStepAllGather"""
|
|
310
352
|
fusion = self.get_attr_dict()["fusion"]
|
|
311
353
|
mean_flag = self.get_attr_dict()["mean_flag"]
|
|
354
|
+
param_name = " "
|
|
355
|
+
if 'mirror_user_id' in self.get_attr_dict():
|
|
356
|
+
param_name = self.get_attr_dict()['mirror_user_id']
|
|
312
357
|
do_mirror = False
|
|
313
358
|
if self.group != "":
|
|
314
359
|
do_mirror = self.get_attr_dict()["do_mirror"]
|
|
@@ -324,6 +369,10 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
324
369
|
dtype = P.DType()
|
|
325
370
|
out_tensor = Tensor(1.0, mstype.float16)
|
|
326
371
|
with_mirror_operator = self.get_attr_dict()["with_mirror_operator"]
|
|
372
|
+
ln_print = P.Print()
|
|
373
|
+
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
374
|
+
square = P.Square()
|
|
375
|
+
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
327
376
|
|
|
328
377
|
def bprop(x, z, out, dout):
|
|
329
378
|
if with_mirror_operator:
|
|
@@ -334,6 +383,8 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
334
383
|
real_grad = F.tensor_mul(real_grad, scale)
|
|
335
384
|
return (real_grad, cast(out_tensor, dtype(z)))
|
|
336
385
|
z = F.depend(z, dout)
|
|
386
|
+
if dump_local_norm:
|
|
387
|
+
z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
|
|
337
388
|
if not do_mirror:
|
|
338
389
|
return (z, cast(out_tensor, dtype(z)))
|
|
339
390
|
real_grad = reduce_scatter(z)
|
|
@@ -529,16 +580,25 @@ def get_bprop_mirror_operator(self):
|
|
|
529
580
|
group = self.get_attr_dict()['group']
|
|
530
581
|
dev_num = self.get_attr_dict()['dev_num']
|
|
531
582
|
mean_flag = self.get_attr_dict()['mean_flag']
|
|
583
|
+
param_name = " "
|
|
584
|
+
if 'mirror_user_id' in self.get_attr_dict():
|
|
585
|
+
param_name = self.get_attr_dict()['mirror_user_id']
|
|
586
|
+
|
|
532
587
|
dev_num_r = 1.0
|
|
588
|
+
dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
|
|
533
589
|
if dev_num > 1:
|
|
534
590
|
dev_num_r = 1.0 / dev_num
|
|
535
591
|
all_reduce = AllReduce(group=group)
|
|
536
592
|
all_gather = AllGather(group=group)
|
|
537
593
|
mul = P.Mul()
|
|
538
594
|
cast = P.Cast()
|
|
595
|
+
ln_print = P.Print()
|
|
596
|
+
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
597
|
+
square = P.Square()
|
|
539
598
|
|
|
540
599
|
fusion = self.get_attr_dict()["fusion"]
|
|
541
600
|
all_reduce.add_prim_attr("fusion", fusion)
|
|
601
|
+
parameter = " "
|
|
542
602
|
if hasattr(self, 'parameter'):
|
|
543
603
|
parameter = self.parameter
|
|
544
604
|
all_reduce.add_prim_attr("parameter", parameter)
|
|
@@ -548,6 +608,9 @@ def get_bprop_mirror_operator(self):
|
|
|
548
608
|
all_reduce.set_prim_instance_name(instance_name)
|
|
549
609
|
|
|
550
610
|
def bprop(x, out, dout):
|
|
611
|
+
if dump_local_norm:
|
|
612
|
+
dout = F.depend(dout, ln_print("dump local norm: ", param_name, reduce_sum(square((dout)))))
|
|
613
|
+
|
|
551
614
|
if dev_num == 1:
|
|
552
615
|
return (dout,)
|
|
553
616
|
if mean_flag:
|
|
@@ -18,12 +18,9 @@
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import mindspore.numpy as mnp
|
|
20
20
|
from mindspore.common import dtype as mstype
|
|
21
|
-
import mindspore.ops as ops
|
|
22
21
|
from mindspore.ops import functional as F
|
|
23
22
|
from mindspore.ops import operations as P
|
|
24
23
|
from mindspore import Tensor
|
|
25
|
-
from mindspore.ops.operations.math_ops import SilentCheck
|
|
26
|
-
from mindspore.ops.operations._inner_ops import _MirrorSilentCheck
|
|
27
24
|
from mindspore.ops.operations.math_ops import CumulativeLogsumexp
|
|
28
25
|
from mindspore.ops.operations.math_ops import MatrixSolve
|
|
29
26
|
from mindspore.ops.operations.math_ops import MatrixSolveLs
|
|
@@ -803,22 +800,3 @@ def get_bprop_tensor_add(self):
|
|
|
803
800
|
return binop_grad_common(x, y, dout, dout)
|
|
804
801
|
|
|
805
802
|
return bprop
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
@bprop_getters.register(_MirrorSilentCheck)
|
|
809
|
-
def get_bprop_mirror_silent_check(self):
|
|
810
|
-
"""Grad definition for '_MirrorSilentCheck' op"""
|
|
811
|
-
silent_check = SilentCheck(self.min_steps, self.thresh_l1, self.coeff_l1, self.thresh_l2, self.coeff_l2)
|
|
812
|
-
out_tensor = Tensor([0.0], mstype.float32)
|
|
813
|
-
|
|
814
|
-
def bporp(x, pre_val, min_val, max_val, n_step, loss_scale, out, dout):
|
|
815
|
-
if dout.dtype == mstype.float16:
|
|
816
|
-
return (dout, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
|
|
817
|
-
if loss_scale is not None:
|
|
818
|
-
gnorm = ops.norm(dout / loss_scale)
|
|
819
|
-
else:
|
|
820
|
-
gnorm = ops.norm(dout)
|
|
821
|
-
dx, _, _, _, _ = silent_check(gnorm, dout, pre_val, min_val, max_val, n_step)
|
|
822
|
-
return (dx, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
|
|
823
|
-
|
|
824
|
-
return bporp
|
|
@@ -2113,6 +2113,7 @@ def get_split_vmap_rule(prim, axis_size):
|
|
|
2113
2113
|
|
|
2114
2114
|
return vmap_rule
|
|
2115
2115
|
|
|
2116
|
+
|
|
2116
2117
|
@vmap_rules_getters.register(P.SearchSorted)
|
|
2117
2118
|
def get_searchsorted_vmap_rule(prim, axis_size):
|
|
2118
2119
|
"""VmapRule for `SearchSorted`."""
|
|
@@ -2131,10 +2132,7 @@ def get_searchsorted_vmap_rule(prim, axis_size):
|
|
|
2131
2132
|
if sorter is not None and sorter_dim is not None:
|
|
2132
2133
|
sorter = _bdim_at_front(sorter, sorter_dim, axis_size)
|
|
2133
2134
|
|
|
2134
|
-
|
|
2135
|
-
right, _ = right_bdim
|
|
2136
|
-
|
|
2137
|
-
outputs = prim(sequence, values, sorter, dtype, right)
|
|
2135
|
+
outputs = prim(sequence, values, sorter, dtype_bdim[0], right_bdim[0])
|
|
2138
2136
|
|
|
2139
2137
|
return outputs, 0
|
|
2140
2138
|
|
|
@@ -916,6 +916,23 @@ def get_isclose_vmap_rule(prim, axis_size):
|
|
|
916
916
|
|
|
917
917
|
return vmap_rule
|
|
918
918
|
|
|
919
|
+
|
|
920
|
+
@vmap_rules_getters.register(P.Round)
|
|
921
|
+
def get_round_vmap_rule(prim, axis_size):
|
|
922
|
+
"""VmapRule for round."""
|
|
923
|
+
if isinstance(prim, str):
|
|
924
|
+
prim = Primitive(prim)
|
|
925
|
+
|
|
926
|
+
def vmap_rule(x_bdim, decimal_bdim):
|
|
927
|
+
var, x_dim = x_bdim
|
|
928
|
+
decimal_var, decimal_dim = decimal_bdim
|
|
929
|
+
if decimal_dim is not None:
|
|
930
|
+
_raise_value_error("For vmap, the batch axis of decimal must be none.")
|
|
931
|
+
out = prim(var, decimal_var)
|
|
932
|
+
return out, x_dim
|
|
933
|
+
|
|
934
|
+
return vmap_rule
|
|
935
|
+
|
|
919
936
|
get_assign_vmap_rule = vmap_rules_getters.register(P.AssignAdd)(get_assign_vmap_rule)
|
|
920
937
|
get_assign_vmap_rule = vmap_rules_getters.register(P.AssignSub)(get_assign_vmap_rule)
|
|
921
938
|
|
|
@@ -949,7 +966,6 @@ get_unop_vmap_rule = vmap_rules_getters.register(P.Reciprocal)(get_unop_vmap_rul
|
|
|
949
966
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Inv)(get_unop_vmap_rule)
|
|
950
967
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Invert)(get_unop_vmap_rule)
|
|
951
968
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Rint)(get_unop_vmap_rule)
|
|
952
|
-
get_unop_vmap_rule = vmap_rules_getters.register(P.Round)(get_unop_vmap_rule)
|
|
953
969
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Rsqrt)(get_unop_vmap_rule)
|
|
954
970
|
get_unop_vmap_rule = vmap_rules_getters.register("Sigmoid")(get_unop_vmap_rule)
|
|
955
971
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Sqrt)(get_unop_vmap_rule)
|
|
@@ -517,7 +517,6 @@ def get_in_top_k_vmap_rule(prim, axis_size):
|
|
|
517
517
|
|
|
518
518
|
@vmap_rules_getters.register(G.FastGeLUGrad)
|
|
519
519
|
@vmap_rules_getters.register(G.HSwishGrad)
|
|
520
|
-
@vmap_rules_getters.register(G.SoftShrinkGrad)
|
|
521
520
|
def get_common_activation_grad_vmap_rule(prim, axis_size):
|
|
522
521
|
"""VmapRule for common activation grad operation."""
|
|
523
522
|
prim_name = prim.name
|
|
@@ -547,6 +546,49 @@ def get_common_activation_grad_vmap_rule(prim, axis_size):
|
|
|
547
546
|
return vmap_rule
|
|
548
547
|
|
|
549
548
|
|
|
549
|
+
@vmap_rules_getters.register("SoftShrink")
|
|
550
|
+
def get_softshrink_vmap_rule(prim, axis_size):
|
|
551
|
+
"""VmapRule for `SoftShrink`."""
|
|
552
|
+
def vmap_rule(x_bdim, lambd_bdim):
|
|
553
|
+
var, dim = x_bdim
|
|
554
|
+
lambd, _ = lambd_bdim
|
|
555
|
+
out = prim(var, lambd)
|
|
556
|
+
return out, dim
|
|
557
|
+
|
|
558
|
+
return vmap_rule
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
@vmap_rules_getters.register("SoftShrinkGrad")
|
|
562
|
+
def get_softshrink_grad_vmap_rule(prim, axis_size):
|
|
563
|
+
"""VmapRule for `SoftShrinkGrad`."""
|
|
564
|
+
prim_name = prim.name
|
|
565
|
+
|
|
566
|
+
def vmap_rule(dy_bdim, x_bdim, lambd_bdim):
|
|
567
|
+
x, x_dim = x_bdim
|
|
568
|
+
lambd, _ = lambd_bdim
|
|
569
|
+
dy, dy_dim = dy_bdim
|
|
570
|
+
x_shape = F.shape(x)
|
|
571
|
+
dy_shape = F.shape(dy)
|
|
572
|
+
if x_dim == dy_dim and x_shape == dy_shape:
|
|
573
|
+
out = prim(dy, x, lambd)
|
|
574
|
+
return out, x_dim
|
|
575
|
+
|
|
576
|
+
if F.rank(x):
|
|
577
|
+
x = _bdim_at_front(x, x_dim, 1)
|
|
578
|
+
if F.rank(dy):
|
|
579
|
+
dy = _bdim_at_front(dy, dy_dim, 1)
|
|
580
|
+
x_shape = F.shape(x)
|
|
581
|
+
dy_shape = F.shape(dy)
|
|
582
|
+
if x_shape != dy_shape:
|
|
583
|
+
raise RuntimeError("For {} vmap, input x shape is supposed to be the same as input dy shape "
|
|
584
|
+
"after batch transforming, but got x_shape {}, dy_shape {}"
|
|
585
|
+
.format(prim_name, x_shape, dy_shape))
|
|
586
|
+
out = prim(dy, x, lambd)
|
|
587
|
+
return out, 0
|
|
588
|
+
|
|
589
|
+
return vmap_rule
|
|
590
|
+
|
|
591
|
+
|
|
550
592
|
@vmap_rules_getters.register("HShrink")
|
|
551
593
|
def get_hshrink_vmap_rule(prim, axis_size):
|
|
552
594
|
"""VmapRule for `HShrink`."""
|
|
@@ -2196,7 +2238,6 @@ get_unop_vmap_rule = vmap_rules_getters.register(P.SeLU)(get_unop_vmap_rule)
|
|
|
2196
2238
|
get_unop_vmap_rule = vmap_rules_getters.register(P.HSigmoid)(get_unop_vmap_rule)
|
|
2197
2239
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Softplus)(get_unop_vmap_rule)
|
|
2198
2240
|
get_unop_vmap_rule = vmap_rules_getters.register(P.Softsign)(get_unop_vmap_rule)
|
|
2199
|
-
get_unop_vmap_rule = vmap_rules_getters.register(P.SoftShrink)(get_unop_vmap_rule)
|
|
2200
2241
|
get_unop_vmap_rule = vmap_rules_getters.register(P.GeLU)(get_unop_vmap_rule)
|
|
2201
2242
|
get_unop_vmap_rule = vmap_rules_getters.register(P.FastGeLU)(get_unop_vmap_rule)
|
|
2202
2243
|
get_unop_vmap_rule = vmap_rules_getters.register(P.HSwish)(get_unop_vmap_rule)
|
|
@@ -21,6 +21,7 @@ op_args_default_value = {
|
|
|
21
21
|
"AdamW": {"amsgrad": False, "maximize": False},
|
|
22
22
|
"AddExt": {"alpha": 1},
|
|
23
23
|
"AddLayerNormV2": {"epsilon": 1e-5, "additionalOut": False},
|
|
24
|
+
"ApplyAdamW": {"max_grad_norm": None, "amsgrad": False, "maximize": False},
|
|
24
25
|
"ApplyCamePart2": {"sum_r": None, "global_shape": None},
|
|
25
26
|
"ApplyCamePart3": {"global_shape": None, "use_first_moment": False},
|
|
26
27
|
"ApplyCamePart4": {"global_shape": None},
|
|
@@ -29,6 +30,7 @@ op_args_default_value = {
|
|
|
29
30
|
"ArgMaxExt": {"dim": None, "keepdim": False},
|
|
30
31
|
"Argmax": {"axis": -1, "output_type": mstype.int32},
|
|
31
32
|
"ArgMaxWithValue": {"axis": 0, "keep_dims": False},
|
|
33
|
+
"ArgMinExt": {"dim": None, "keepdim": False},
|
|
32
34
|
"Argmin": {"axis": -1, "output_type": mstype.int32},
|
|
33
35
|
"ArgMinWithValue": {"axis": 0, "keep_dims": False},
|
|
34
36
|
"AvgPool2DGrad": {"padding": 0, "ceil_mode": False, "count_include_pad": True, "divisor_override": None},
|
|
@@ -36,8 +38,8 @@ op_args_default_value = {
|
|
|
36
38
|
"AvgPoolGrad": {"kernel_size": 1, "strides": 1, "pad_mode": 'VALID', "data_format": 'NCHW'},
|
|
37
39
|
"AvgPool": {"kernel_size": 1, "strides": 1, "pad_mode": 'VALID', "data_format": 'NCHW'},
|
|
38
40
|
"BatchMatMul": {"transpose_a": False, "transpose_b": False},
|
|
39
|
-
"BatchNormExt": {"training": False, "momentum": 0.1, "epsilon": 1e-5},
|
|
40
|
-
"BatchNormGradExt": {"training": False, "eps": 1e-5},
|
|
41
|
+
"BatchNormExt": {"running_mean": None, "runnning_var": None, "training": False, "momentum": 0.1, "epsilon": 1e-5},
|
|
42
|
+
"BatchNormGradExt": {"running_mean": None, "running_var": None, "saved_mean": None, "saved_rstd": None, "training": False, "eps": 1e-5},
|
|
41
43
|
"BatchNormGradGrad": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
|
|
42
44
|
"BatchNormGrad": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
|
|
43
45
|
"BatchNormGradWithActivation": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
|
|
@@ -63,10 +65,13 @@ op_args_default_value = {
|
|
|
63
65
|
"ConvolutionGrad": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1, "output_mask": ()},
|
|
64
66
|
"Convolution": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1},
|
|
65
67
|
"Correlate": {"mode": 'valid'},
|
|
68
|
+
"CountNonZero": {"dim": None},
|
|
69
|
+
"Cross": {"dim": -65530},
|
|
66
70
|
"CumProd": {"exclusive": False, "reverse": False},
|
|
67
71
|
"CumSum": {"exclusive": False, "reverse": False},
|
|
68
72
|
"CumsumExt": {"dtype": None},
|
|
69
|
-
"DCT": {"
|
|
73
|
+
"DCT": {"type": 2, "n": None, "axis": -1, "norm": None},
|
|
74
|
+
"DCTN": {"type": 2, "s": None, "axes": None, "norm": None},
|
|
70
75
|
"Dense": {"bias": None},
|
|
71
76
|
"Diagonal": {"offset": 0, "dim1": 0, "dim2": 1},
|
|
72
77
|
"DivMod": {"rounding_mode": None},
|
|
@@ -75,13 +80,25 @@ op_args_default_value = {
|
|
|
75
80
|
"EluExt": {"alpha": 1.0},
|
|
76
81
|
"EluGradExt": {"alpha": 1.0},
|
|
77
82
|
"Elu": {"alpha": 1.0},
|
|
83
|
+
"EmbeddingApplyAdaGrad": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
84
|
+
"EmbeddingApplyAdam": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
85
|
+
"EmbeddingApplyAdamW": {"ams_grad": (0,), "mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
86
|
+
"EmbeddingApplyFtrl": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
87
|
+
"EmbeddingApplyRmsprop": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
88
|
+
"EmbeddingApplySgd": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
78
89
|
"EmbeddingDenseBackward": {"padding_idx": None, "scale_grad_by_freq": False},
|
|
90
|
+
"EmbeddingFeatureMappingFileSize": {"only_offset_flag": True},
|
|
91
|
+
"EmbeddingFeatureMappingFind": {"num": 1},
|
|
92
|
+
"EmbeddingFeatureMappingImport": {"only_offset_flag": True, "num": 1},
|
|
79
93
|
"Embedding": {"padding_idx": None, "max_norm": None, "norm_type": 2.0, "scale_grad_by_freq": False},
|
|
94
|
+
"EmbeddingTableEvict": {"steps_to_live": 0},
|
|
80
95
|
"ExtractImagePatches": {"padding": 'VALID'},
|
|
81
96
|
"FFNExt": {"expertTokens": None, "bias1": None, "bias2": None, "scale": None, "offset": None, "deqScale1": None, "deqScale2": None, "antiquant_scale1": None, "antiquant_scale2": None, "antiquant_offset1": None, "antiquant_offset2": None, "activation": 'fastgelu', "inner_precise": 0},
|
|
82
97
|
"FFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
83
98
|
"FFT": {"n": None, "dim": -1, "norm": None},
|
|
99
|
+
"FFTOrtho": {"axes": None, "forward": True},
|
|
84
100
|
"FFTWithSize": {"norm": 'backward', "onesided": True, "signal_sizes": ()},
|
|
101
|
+
"FFTFreq": {"d": 1.0, "dtype": None},
|
|
85
102
|
"FFTN": {"s": None, "dim": None, "norm": None},
|
|
86
103
|
"FFTShift": {"dim": None},
|
|
87
104
|
"FillScalar": {"dtype": None},
|
|
@@ -90,23 +107,42 @@ op_args_default_value = {
|
|
|
90
107
|
"FlashAttentionScore": {"real_shift": None, "drop_mask": None, "padding_mask": None, "attn_mask": None, "prefix": None, "actual_seq_qlen": None, "actual_seq_kvlen": None, "keep_prob": 1.0, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "inner_precise": 0, "input_layout": 'BSH', "sparse_mode": 0},
|
|
91
108
|
"FlattenExt": {"start_dim": 0, "end_dim": -1},
|
|
92
109
|
"Gather": {"batch_dims": 0},
|
|
110
|
+
"GenerateEodMaskV2": {"start": 0, "steps": 1, "error_mode": 'cycle', "flip_mode": 'bitflip', "multiply_factor": 0.0, "bit_pos": 0, "flip_probability": 0.0},
|
|
93
111
|
"GridSampler2DGrad": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
|
|
94
112
|
"GridSampler2D": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
|
|
95
113
|
"GridSampler3DGrad": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
|
|
96
114
|
"GridSampler3D": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
|
|
97
115
|
"GroupNormGrad": {"dx_is_require": True, "dgamma_is_require": True, "dbeta_is_require": True},
|
|
98
116
|
"GroupNorm": {"weight": None, "bias": None, "eps": 1e-5},
|
|
117
|
+
"HFFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
118
|
+
"HFFT": {"n": None, "dim": -1, "norm": None},
|
|
119
|
+
"HFFTN": {"s": None, "dim": None, "norm": None},
|
|
120
|
+
"HistcExt": {"bins": 100, "min": 0, "max": 0},
|
|
99
121
|
"HShrinkGrad": {"lambd": 0.5},
|
|
100
122
|
"HShrink": {"lambd": 0.5},
|
|
123
|
+
"IDCT": {"type": 2, "n": None, "axis": -1, "norm": None},
|
|
124
|
+
"IDCTN": {"type": 2, "s": None, "axes": None, "norm": None},
|
|
101
125
|
"IFFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
102
126
|
"IFFT": {"n": None, "dim": -1, "norm": None},
|
|
103
127
|
"IFFTN": {"s": None, "dim": None, "norm": None},
|
|
104
128
|
"IFFTShift": {"dim": None},
|
|
129
|
+
"IHFFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
130
|
+
"IHFFT": {"n": None, "dim": -1, "norm": None},
|
|
131
|
+
"IHFFTN": {"s": None, "dim": None, "norm": None},
|
|
105
132
|
"Im2ColExt": {"dilation": 1, "padding": 0, "stride": 1},
|
|
133
|
+
"IncreFlashAttention": {"attn_mask": None, "actual_seq_lengths": None, "pse_shift": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "kv_padding_size": None, "num_heads": 1, "input_layout": 'BSH', "scale_value": 1.0, "num_key_value_heads": 0, "block_size": 0, "inner_precise": 1},
|
|
106
134
|
"IndexAddExt": {"alpha": 1},
|
|
107
|
-
"
|
|
135
|
+
"InplaceAddExt": {"alpha": 1},
|
|
136
|
+
"InplaceAddmm": {"beta": 1, "alpha": 1},
|
|
137
|
+
"InplaceAddsExt": {"alpha": 1},
|
|
138
|
+
"InsertGemV2InBackward": {"start": 0, "steps": 1, "error_mode": 'cycle', "flip_mode": 'bitflip', "multiply_factor": 0.0, "bit_pos": 0, "flip_probability": 0.0},
|
|
139
|
+
"IRFFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
140
|
+
"IRFFTDouble": {"dim": -1},
|
|
108
141
|
"IRFFT": {"n": None, "dim": -1, "norm": None},
|
|
142
|
+
"IRFFTN": {"s": None, "dim": None, "norm": None},
|
|
109
143
|
"IsClose": {"rtol": 1e-05, "atol": 1e-08, "equal_nan": True},
|
|
144
|
+
"L1LossBackwardExt": {"reduction": 'mean'},
|
|
145
|
+
"L1LossExt": {"reduction": 'mean'},
|
|
110
146
|
"LayerNormExt": {"weight": None, "bias": None, "eps": 1e-5},
|
|
111
147
|
"LayerNormGradGrad": {"begin_norm_axis": 1, "begin_params_axis": 1},
|
|
112
148
|
"LayerNormGrad": {"begin_norm_axis": 1, "begin_params_axis": 1},
|
|
@@ -116,10 +152,13 @@ op_args_default_value = {
|
|
|
116
152
|
"LeakyReLUExt": {"negative_slope": 0.01},
|
|
117
153
|
"LeakyReLUGradExt": {"negative_slope": 0.01, "is_result": False},
|
|
118
154
|
"LinSpaceExt": {"dtype": None},
|
|
155
|
+
"LogSoftmaxExt": {"dim": None, "dtype": None},
|
|
119
156
|
"LogSoftmaxGrad": {"axis": -1},
|
|
120
157
|
"LogSoftmax": {"axis": -1},
|
|
121
158
|
"LogitGrad": {"eps": -1.0},
|
|
122
159
|
"Logit": {"eps": -1.0},
|
|
160
|
+
"LpNormV2": {"p": 2.0, "dim": None, "keepdim": False, "epsilon": 1e-12},
|
|
161
|
+
"LstsqV2": {"driver": None},
|
|
123
162
|
"MatMul": {"transpose_a": False, "transpose_b": False},
|
|
124
163
|
"MaxPoolGradWithIndices": {"strides": None, "pads": 0, "dilation": (1, 1), "ceil_mode": False, "argmax_type": mstype.int64},
|
|
125
164
|
"MaxPoolGradWithMask": {"strides": None, "pads": 0, "dilation": (1, 1), "ceil_mode": False, "argmax_type": mstype.int64},
|
|
@@ -128,20 +167,30 @@ op_args_default_value = {
|
|
|
128
167
|
"MaximumGradGrad": {"grad_x": True, "grad_y": True},
|
|
129
168
|
"MaximumGrad": {"grad_x": True, "grad_y": True},
|
|
130
169
|
"MeanExt": {"axis": None, "keep_dims": False, "dtype": None},
|
|
170
|
+
"MedianDim": {"dim": -1, "keepdim": False},
|
|
131
171
|
"MinimumGrad": {"grad_x": True, "grad_y": True},
|
|
172
|
+
"MSELossExt": {"reduction": 'mean'},
|
|
173
|
+
"MSELossGradExt": {"reduction": 'mean'},
|
|
132
174
|
"NanToNum": {"nan": None, "posinf": None, "neginf": None},
|
|
133
175
|
"NLLLossGrad": {"reduction": 'mean', "ignore_index": -100},
|
|
134
176
|
"NLLLoss": {"reduction": 'mean', "ignore_index": -100},
|
|
135
|
-
"Norm": {"
|
|
177
|
+
"Norm": {"p": 2.0, "dim": None, "keepdim": False, "dtype": None},
|
|
136
178
|
"OneHotExt": {"axis": -1},
|
|
137
179
|
"OneHot": {"axis": -1},
|
|
138
180
|
"OnesLikeExt": {"dtype": None},
|
|
139
181
|
"Ones": {"dtype": None},
|
|
182
|
+
"PagedAttentionMask": {"antiquant_scale": None, "antiquant_offset": None, "alibi_mask": None, "kv_cache_quant_mode": 'DEFAULT'},
|
|
183
|
+
"PagedAttention": {"antiquant_scale": None, "antiquant_offset": None, "attn_mask": None, "q_seq_lens": None, "kv_cache_quant_mode": 'DEFAULT'},
|
|
140
184
|
"ProdExt": {"axis": None, "keep_dims": False, "dtype": None},
|
|
141
185
|
"PromptKVCache": {"align_mode": 'LEFT'},
|
|
142
186
|
"Qr": {"full_matrices": False},
|
|
143
187
|
"RandExt": {"dtype": None},
|
|
144
188
|
"RandLikeExt": {"dtype": None},
|
|
189
|
+
"RandIntLike": {"dtype": None},
|
|
190
|
+
"RandInt": {"dtype": None},
|
|
191
|
+
"RandnLike": {"dtype": None},
|
|
192
|
+
"Randn": {"dtype": None},
|
|
193
|
+
"RandpermExt": {"dtype": mstype.int64},
|
|
145
194
|
"RandpermV2": {"seed": 0, "offset": 0, "dtype": mstype.int64},
|
|
146
195
|
"Range": {"maxlen": 1000000},
|
|
147
196
|
"ReduceAll": {"axis": None, "keep_dims": False},
|
|
@@ -165,16 +214,27 @@ op_args_default_value = {
|
|
|
165
214
|
"ResizeNearestNeighbor": {"align_corners": False, "half_pixel_centers": False},
|
|
166
215
|
"ResizeNearestNeighborV2Grad": {"align_corners": False, "half_pixel_centers": False},
|
|
167
216
|
"ResizeNearestNeighborV2": {"align_corners": False, "half_pixel_centers": False},
|
|
168
|
-
"
|
|
217
|
+
"RFFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
169
218
|
"RFFT": {"n": None, "dim": -1, "norm": None},
|
|
219
|
+
"RFFTFreq": {"d": 1.0, "dtype": None},
|
|
220
|
+
"RFFTN": {"s": None, "dim": None, "norm": None},
|
|
170
221
|
"RmsNorm": {"epsilon": 1e-6},
|
|
222
|
+
"Roll": {"axis": None},
|
|
223
|
+
"RotaryPositionEmbeddingGrad": {"dx": None, "mode": 0},
|
|
224
|
+
"RotaryPositionEmbedding": {"mode": 0},
|
|
225
|
+
"Round": {"decimals": 0},
|
|
171
226
|
"ScalarToTensor": {"dtype": None},
|
|
227
|
+
"Scatter": {"reduce": 'none'},
|
|
228
|
+
"ScatterValue": {"reduce": 'none'},
|
|
172
229
|
"SearchSorted": {"sorter": None, "dtype": mstype.int64, "right": False},
|
|
173
230
|
"SequenceConcat": {"axis": 0},
|
|
231
|
+
"SilentCheckV2": {"c_min_steps": 7, "c_thresh_l1": 1000000.0, "c_coeff_l1": 100000.0, "c_thresh_l2": 10000.0, "c_coeff_l2": 5000.0, "npu_asd_detect": 1},
|
|
174
232
|
"SoftmaxBackward": {"dim": -1},
|
|
175
233
|
"Softmax": {"axis": -1},
|
|
176
234
|
"SoftplusExt": {"beta": 1, "threshold": 20},
|
|
177
235
|
"SoftplusGradExt": {"beta": 1, "threshold": 20},
|
|
236
|
+
"SoftShrinkGrad": {"lambd": 0.5},
|
|
237
|
+
"SoftShrink": {"lambd": 0.5},
|
|
178
238
|
"SolveTriangular": {"trans": 0, "lower": False, "unit_diagonal": False},
|
|
179
239
|
"SortExt": {"dim": -1, "descending": False, "stable": False},
|
|
180
240
|
"Split": {"axis": 0, "output_num": 1},
|
|
@@ -184,11 +244,20 @@ op_args_default_value = {
|
|
|
184
244
|
"StridedSlice": {"begin_mask": 0, "end_mask": 0, "ellipsis_mask": 0, "new_axis_mask": 0, "shrink_axis_mask": 0},
|
|
185
245
|
"SubExt": {"alpha": 1},
|
|
186
246
|
"SumExt": {"dim": None, "keepdim": False, "dtype": None},
|
|
247
|
+
"SwigluGrad": {"dim": -1},
|
|
248
|
+
"Swiglu": {"dim": -1},
|
|
249
|
+
"TensorScatterElements": {"axis": 0, "reduce": 'none'},
|
|
187
250
|
"TopkExt": {"dim": -1, "largest": True, "sorted": True},
|
|
251
|
+
"TopKRouter": {"drop_type": 0},
|
|
252
|
+
"TraceV2Grad": {"offset": 0, "axis1": 1, "axis2": 0},
|
|
253
|
+
"TraceV2": {"offset": 0, "axis1": 1, "axis2": 0, "dtype": None},
|
|
254
|
+
"TrilExt": {"diagonal": 0},
|
|
188
255
|
"Triu": {"diagonal": 0},
|
|
189
256
|
"TupleToTensor": {"dtype": None},
|
|
190
257
|
"Unique2": {"sorted": True, "return_inverse": False, "return_counts": False},
|
|
191
258
|
"UnstackExt": {"axis": 0},
|
|
259
|
+
"UpsampleBicubic2DGrad": {"output_size": None, "scales": None, "align_corners": False},
|
|
260
|
+
"UpsampleBicubic2D": {"output_size": None, "scales": None, "align_corners": False},
|
|
192
261
|
"UpsampleBilinear2DGrad": {"output_size": None, "scales": None, "align_corners": False},
|
|
193
262
|
"UpsampleBilinear2D": {"output_size": None, "scales": None, "align_corners": False},
|
|
194
263
|
"UpsampleLinear1DGrad": {"output_size": None, "scales": None, "align_corners": False},
|
|
@@ -203,12 +272,13 @@ op_args_default_value = {
|
|
|
203
272
|
"UpsampleTrilinear3D": {"output_size": None, "scales": None, "align_corners": False},
|
|
204
273
|
"ZerosLikeExt": {"dtype": None},
|
|
205
274
|
"Zeros": {"dtype": None},
|
|
275
|
+
"AddRmsNormQuantV2": {"epsilon": 1e-5},
|
|
206
276
|
"DynamicQuantExt": {"smooth_scales": None},
|
|
207
277
|
"FusedInferAttentionScore": {"pse_shift": None, "attn_mask": None, "actual_seq_lengths": None, "actual_seq_lengths_kv": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "query_padding_size": None, "kv_padding_size": None, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "input_layout": 'BSH', "num_key_value_heads": 0, "sparse_mode": 0, "inner_precise": 1, "block_size": 0, "antiquant_mode": 0, "softmax_lse_flag": False},
|
|
208
278
|
"GroupedMatmul": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1},
|
|
209
279
|
"KVCacheScatterUpdate": {"reduce": 'none'},
|
|
210
280
|
"MoeFinalizeRouting": {"x2": None, "bias": None, "scales": None, "expanded_row_idx": None, "expanded_expert_idx": None},
|
|
211
|
-
"QuantBatchMatmul": {"offset": None, "bias": None, "transpose_x1": False, "transpose_x2": False, "dtype": mstype.float16},
|
|
281
|
+
"QuantBatchMatmul": {"offset": None, "bias": None, "pertokenScaleOptional": None, "transpose_x1": False, "transpose_x2": False, "dtype": mstype.float16},
|
|
212
282
|
"QuantV2": {"sqrt_mode": False, "rounding_mode": 'ROUND', "dst_type": mstype.int8},
|
|
213
283
|
"WeightQuantBatchMatmul": {"antiquant_offset": None, "quant_scale": None, "quant_offset": None, "bias": None, "transpose_x": False, "transpose_weight": False, "antiquant_group_size": 0},
|
|
214
284
|
}
|
|
@@ -216,16 +286,30 @@ op_args_default_value = {
|
|
|
216
286
|
op_labels = {
|
|
217
287
|
"AdamWeightDecay": {"side_effect_mem": True},
|
|
218
288
|
"AdamW": {"side_effect_mem": True},
|
|
289
|
+
"ApplyAdamW": {"side_effect_mem": True},
|
|
219
290
|
"AssignAdd": {"side_effect_mem": True},
|
|
220
291
|
"Assign": {"side_effect_mem": True},
|
|
292
|
+
"CopyExt": {"side_effect_mem": True},
|
|
221
293
|
"DecoderKVCache": {"side_effect_mem": True},
|
|
222
294
|
"DropoutExt": {"side_effect_hidden": True},
|
|
223
295
|
"DropoutGenMaskExt": {"side_effect_hidden": True},
|
|
224
296
|
"Dropout": {"side_effect_hidden": True},
|
|
297
|
+
"EmbeddingApplyAdaGrad": {"_process_node_engine_id": 'PS'},
|
|
298
|
+
"EmbeddingApplyAdam": {"_process_node_engine_id": 'PS'},
|
|
299
|
+
"EmbeddingApplyAdamW": {"_process_node_engine_id": 'PS'},
|
|
300
|
+
"EmbeddingApplyFtrl": {"_process_node_engine_id": 'PS'},
|
|
301
|
+
"EmbeddingApplyRmsprop": {"_process_node_engine_id": 'PS'},
|
|
302
|
+
"EmbeddingApplySgd": {"_process_node_engine_id": 'PS'},
|
|
225
303
|
"Embedding": {"side_effect_mem": True},
|
|
304
|
+
"EmbeddingTableEvict": {"_process_node_engine_id": 'PS'},
|
|
226
305
|
"Generator": {"side_effect_mem": True},
|
|
306
|
+
"InplaceAddExt": {"side_effect_mem": True},
|
|
307
|
+
"InplaceAddmm": {"side_effect_mem": True},
|
|
308
|
+
"InplaceAddsExt": {"side_effect_mem": True},
|
|
227
309
|
"Log": {"cust_aicpu": 'Log', "base": -1.0, "scale": 1.0, "shift": 0.0},
|
|
228
310
|
"PromptKVCache": {"side_effect_mem": True},
|
|
229
311
|
"ReshapeAndCache": {"side_effect_mem": True},
|
|
230
312
|
"ResizeD": {"mode": 'linear'},
|
|
313
|
+
"SilentCheckV2": {"side_effect_mem": True},
|
|
314
|
+
"KVCacheScatterUpdate": {"side_effect_mem": True},
|
|
231
315
|
}
|