mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (287) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +3 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +50 -9
  7. mindspore/_extends/parse/compile_config.py +41 -0
  8. mindspore/_extends/parse/parser.py +9 -7
  9. mindspore/_extends/parse/standard_method.py +52 -14
  10. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  11. mindspore/amp.py +24 -10
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/common/__init__.py +6 -4
  18. mindspore/common/_pijit_context.py +190 -0
  19. mindspore/common/_register_for_tensor.py +2 -1
  20. mindspore/common/_tensor_overload.py +139 -0
  21. mindspore/common/api.py +102 -87
  22. mindspore/common/dump.py +5 -6
  23. mindspore/common/generator.py +1 -7
  24. mindspore/common/hook_handle.py +14 -26
  25. mindspore/common/initializer.py +51 -15
  26. mindspore/common/mindir_util.py +2 -2
  27. mindspore/common/parameter.py +62 -15
  28. mindspore/common/recompute.py +39 -9
  29. mindspore/common/sparse_tensor.py +7 -3
  30. mindspore/common/tensor.py +183 -37
  31. mindspore/communication/__init__.py +1 -1
  32. mindspore/communication/_comm_helper.py +38 -3
  33. mindspore/communication/comm_func.py +315 -60
  34. mindspore/communication/management.py +14 -14
  35. mindspore/context.py +132 -22
  36. mindspore/dataset/__init__.py +1 -1
  37. mindspore/dataset/audio/__init__.py +1 -1
  38. mindspore/dataset/core/config.py +7 -0
  39. mindspore/dataset/core/validator_helpers.py +7 -0
  40. mindspore/dataset/engine/cache_client.py +1 -1
  41. mindspore/dataset/engine/datasets.py +72 -44
  42. mindspore/dataset/engine/datasets_audio.py +7 -7
  43. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  44. mindspore/dataset/engine/datasets_text.py +20 -20
  45. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  46. mindspore/dataset/engine/datasets_vision.py +33 -33
  47. mindspore/dataset/engine/iterators.py +29 -0
  48. mindspore/dataset/engine/obs/util.py +7 -0
  49. mindspore/dataset/engine/queue.py +114 -60
  50. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  51. mindspore/dataset/engine/validators.py +34 -14
  52. mindspore/dataset/text/__init__.py +1 -4
  53. mindspore/dataset/transforms/__init__.py +0 -3
  54. mindspore/dataset/utils/line_reader.py +2 -0
  55. mindspore/dataset/vision/__init__.py +1 -4
  56. mindspore/dataset/vision/utils.py +1 -1
  57. mindspore/dataset/vision/validators.py +2 -1
  58. mindspore/dnnl.dll +0 -0
  59. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  60. mindspore/experimental/es/embedding_service.py +883 -0
  61. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  62. mindspore/experimental/llm_boost/__init__.py +21 -0
  63. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  64. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  65. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  66. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  67. mindspore/experimental/llm_boost/register.py +129 -0
  68. mindspore/experimental/llm_boost/utils.py +31 -0
  69. mindspore/experimental/optim/adamw.py +85 -0
  70. mindspore/experimental/optim/optimizer.py +3 -0
  71. mindspore/hal/__init__.py +3 -3
  72. mindspore/hal/contiguous_tensors_handle.py +175 -0
  73. mindspore/hal/stream.py +18 -0
  74. mindspore/include/api/model_group.h +13 -1
  75. mindspore/include/api/types.h +10 -10
  76. mindspore/include/dataset/config.h +2 -2
  77. mindspore/include/dataset/constants.h +2 -2
  78. mindspore/include/dataset/execute.h +2 -2
  79. mindspore/include/dataset/vision.h +4 -0
  80. mindspore/jpeg62.dll +0 -0
  81. mindspore/log.py +1 -1
  82. mindspore/mindrecord/filewriter.py +68 -51
  83. mindspore/mindspore_backend.dll +0 -0
  84. mindspore/mindspore_common.dll +0 -0
  85. mindspore/mindspore_core.dll +0 -0
  86. mindspore/mindspore_glog.dll +0 -0
  87. mindspore/mindspore_np_dtype.dll +0 -0
  88. mindspore/mindspore_ops.dll +0 -0
  89. mindspore/mint/__init__.py +983 -46
  90. mindspore/mint/distributed/__init__.py +31 -0
  91. mindspore/mint/distributed/distributed.py +254 -0
  92. mindspore/mint/nn/__init__.py +268 -23
  93. mindspore/mint/nn/functional.py +125 -19
  94. mindspore/mint/nn/layer/__init__.py +39 -0
  95. mindspore/mint/nn/layer/activation.py +133 -0
  96. mindspore/mint/nn/layer/normalization.py +477 -0
  97. mindspore/mint/nn/layer/pooling.py +110 -0
  98. mindspore/mint/optim/adamw.py +26 -13
  99. mindspore/mint/special/__init__.py +63 -0
  100. mindspore/multiprocessing/__init__.py +2 -1
  101. mindspore/nn/__init__.py +0 -1
  102. mindspore/nn/cell.py +276 -96
  103. mindspore/nn/layer/activation.py +211 -44
  104. mindspore/nn/layer/basic.py +137 -10
  105. mindspore/nn/layer/embedding.py +137 -2
  106. mindspore/nn/layer/normalization.py +101 -5
  107. mindspore/nn/layer/padding.py +34 -48
  108. mindspore/nn/layer/pooling.py +161 -7
  109. mindspore/nn/layer/transformer.py +3 -3
  110. mindspore/nn/loss/__init__.py +2 -2
  111. mindspore/nn/loss/loss.py +84 -6
  112. mindspore/nn/optim/__init__.py +2 -1
  113. mindspore/nn/optim/adadelta.py +1 -1
  114. mindspore/nn/optim/adam.py +1 -1
  115. mindspore/nn/optim/lamb.py +1 -1
  116. mindspore/nn/optim/tft_wrapper.py +124 -0
  117. mindspore/nn/wrap/cell_wrapper.py +12 -23
  118. mindspore/nn/wrap/grad_reducer.py +5 -5
  119. mindspore/nn/wrap/loss_scale.py +17 -3
  120. mindspore/numpy/__init__.py +1 -1
  121. mindspore/numpy/array_creations.py +65 -68
  122. mindspore/numpy/array_ops.py +64 -60
  123. mindspore/numpy/fft.py +610 -75
  124. mindspore/numpy/logic_ops.py +11 -10
  125. mindspore/numpy/math_ops.py +85 -84
  126. mindspore/numpy/utils_const.py +4 -4
  127. mindspore/opencv_core452.dll +0 -0
  128. mindspore/opencv_imgcodecs452.dll +0 -0
  129. mindspore/opencv_imgproc452.dll +0 -0
  130. mindspore/ops/__init__.py +6 -4
  131. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  132. mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
  133. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  134. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  135. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  136. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  137. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
  138. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  139. mindspore/ops/auto_generate/gen_extend_func.py +767 -13
  140. mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
  141. mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
  142. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  143. mindspore/ops/composite/base.py +85 -48
  144. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  145. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  146. mindspore/ops/function/__init__.py +22 -0
  147. mindspore/ops/function/array_func.py +492 -153
  148. mindspore/ops/function/debug_func.py +113 -1
  149. mindspore/ops/function/fft_func.py +15 -2
  150. mindspore/ops/function/grad/grad_func.py +3 -2
  151. mindspore/ops/function/math_func.py +564 -207
  152. mindspore/ops/function/nn_func.py +817 -383
  153. mindspore/ops/function/other_func.py +3 -2
  154. mindspore/ops/function/random_func.py +402 -12
  155. mindspore/ops/function/reshard_func.py +13 -11
  156. mindspore/ops/function/sparse_unary_func.py +1 -1
  157. mindspore/ops/function/vmap_func.py +3 -2
  158. mindspore/ops/functional.py +24 -14
  159. mindspore/ops/op_info_register.py +3 -3
  160. mindspore/ops/operations/__init__.py +7 -2
  161. mindspore/ops/operations/_grad_ops.py +2 -76
  162. mindspore/ops/operations/_infer_ops.py +1 -1
  163. mindspore/ops/operations/_inner_ops.py +71 -94
  164. mindspore/ops/operations/array_ops.py +14 -146
  165. mindspore/ops/operations/comm_ops.py +63 -53
  166. mindspore/ops/operations/custom_ops.py +83 -19
  167. mindspore/ops/operations/debug_ops.py +42 -10
  168. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  169. mindspore/ops/operations/manually_defined/ops_def.py +273 -20
  170. mindspore/ops/operations/math_ops.py +12 -223
  171. mindspore/ops/operations/nn_ops.py +20 -114
  172. mindspore/ops/operations/other_ops.py +7 -4
  173. mindspore/ops/operations/random_ops.py +46 -1
  174. mindspore/ops/primitive.py +18 -6
  175. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  176. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  177. mindspore/ops_generate/gen_constants.py +36 -0
  178. mindspore/ops_generate/gen_ops.py +67 -52
  179. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  180. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  181. mindspore/ops_generate/op_proto.py +10 -3
  182. mindspore/ops_generate/pyboost_utils.py +14 -1
  183. mindspore/ops_generate/template.py +43 -21
  184. mindspore/parallel/__init__.py +3 -1
  185. mindspore/parallel/_auto_parallel_context.py +31 -9
  186. mindspore/parallel/_cell_wrapper.py +85 -0
  187. mindspore/parallel/_parallel_serialization.py +47 -19
  188. mindspore/parallel/_tensor.py +127 -13
  189. mindspore/parallel/_utils.py +53 -22
  190. mindspore/parallel/algo_parameter_config.py +5 -5
  191. mindspore/parallel/checkpoint_transform.py +46 -39
  192. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  193. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  194. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  195. mindspore/parallel/parameter_broadcast.py +3 -4
  196. mindspore/parallel/shard.py +162 -31
  197. mindspore/parallel/transform_safetensors.py +1146 -0
  198. mindspore/profiler/__init__.py +2 -1
  199. mindspore/profiler/common/constant.py +29 -0
  200. mindspore/profiler/common/registry.py +47 -0
  201. mindspore/profiler/common/util.py +28 -0
  202. mindspore/profiler/dynamic_profiler.py +694 -0
  203. mindspore/profiler/envprofiling.py +17 -19
  204. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  205. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  206. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  207. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  208. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  209. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  210. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  211. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  212. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  213. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  214. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  215. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  216. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  217. mindspore/profiler/parser/framework_parser.py +1 -391
  218. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  219. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  220. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  221. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  222. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  223. mindspore/profiler/parser/profiler_info.py +78 -6
  224. mindspore/profiler/profiler.py +153 -0
  225. mindspore/profiler/profiling.py +285 -413
  226. mindspore/rewrite/__init__.py +1 -2
  227. mindspore/rewrite/common/namespace.py +4 -4
  228. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  229. mindspore/run_check/_check_version.py +39 -104
  230. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  231. mindspore/swresample-4.dll +0 -0
  232. mindspore/swscale-6.dll +0 -0
  233. mindspore/tinyxml2.dll +0 -0
  234. mindspore/train/__init__.py +4 -3
  235. mindspore/train/_utils.py +105 -19
  236. mindspore/train/amp.py +171 -53
  237. mindspore/train/callback/__init__.py +2 -2
  238. mindspore/train/callback/_callback.py +4 -4
  239. mindspore/train/callback/_checkpoint.py +97 -31
  240. mindspore/train/callback/_cluster_monitor.py +1 -1
  241. mindspore/train/callback/_flops_collector.py +1 -0
  242. mindspore/train/callback/_loss_monitor.py +3 -3
  243. mindspore/train/callback/_on_request_exit.py +145 -31
  244. mindspore/train/callback/_summary_collector.py +5 -5
  245. mindspore/train/callback/_tft_register.py +375 -0
  246. mindspore/train/dataset_helper.py +15 -3
  247. mindspore/train/metrics/metric.py +3 -3
  248. mindspore/train/metrics/roc.py +4 -4
  249. mindspore/train/mind_ir_pb2.py +44 -39
  250. mindspore/train/model.py +154 -58
  251. mindspore/train/serialization.py +342 -128
  252. mindspore/turbojpeg.dll +0 -0
  253. mindspore/utils/__init__.py +21 -0
  254. mindspore/utils/utils.py +60 -0
  255. mindspore/version.py +1 -1
  256. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
  257. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +260 -254
  258. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +1 -1
  259. mindspore/include/c_api/ms/abstract.h +0 -67
  260. mindspore/include/c_api/ms/attribute.h +0 -197
  261. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  262. mindspore/include/c_api/ms/base/macros.h +0 -32
  263. mindspore/include/c_api/ms/base/status.h +0 -33
  264. mindspore/include/c_api/ms/base/types.h +0 -283
  265. mindspore/include/c_api/ms/context.h +0 -102
  266. mindspore/include/c_api/ms/graph.h +0 -160
  267. mindspore/include/c_api/ms/node.h +0 -606
  268. mindspore/include/c_api/ms/tensor.h +0 -161
  269. mindspore/include/c_api/ms/value.h +0 -84
  270. mindspore/mindspore_shared_lib.dll +0 -0
  271. mindspore/nn/extend/basic.py +0 -140
  272. mindspore/nn/extend/embedding.py +0 -143
  273. mindspore/nn/extend/layer/normalization.py +0 -109
  274. mindspore/nn/extend/pooling.py +0 -117
  275. mindspore/nn/layer/embedding_service.py +0 -531
  276. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  277. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  278. mindspore/ops/extend/__init__.py +0 -53
  279. mindspore/ops/extend/array_func.py +0 -218
  280. mindspore/ops/extend/math_func.py +0 -76
  281. mindspore/ops/extend/nn_func.py +0 -308
  282. mindspore/ops/silent_check.py +0 -162
  283. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  284. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  285. mindspore/train/callback/_mindio_ttp.py +0 -443
  286. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
  287. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/ops/__init__.py CHANGED
@@ -29,13 +29,14 @@ from mindspore.ops.vm_impl_registry import get_vm_impl_fn, vm_impl_registry
29
29
  from mindspore.ops.op_info_register import op_info_register, custom_info_register, AkgGpuRegOp, AkgAscendRegOp, \
30
30
  AiCPURegOp, TBERegOp, CpuRegOp, CustomRegOp, DataType
31
31
  from mindspore.ops.primitive import constexpr
32
- from mindspore.ops import composite, operations, functional, function, auto_generate, extend
32
+ from mindspore.ops import composite, operations, functional, function
33
33
  from mindspore.ops import signature
34
+ from mindspore.ops.auto_generate import cpp_create_prim_instance_helper, gen_arg_dtype_cast, gen_arg_handler, \
35
+ gen_extend_func, gen_ops_def, gen_ops_prim, pyboost_inner_prim
34
36
  from mindspore.ops.composite import *
35
37
  from mindspore.ops.operations import *
36
38
  from mindspore.ops.function import *
37
39
  from mindspore.ops.functional import *
38
- from mindspore.ops.silent_check import _silent_check
39
40
 
40
41
  __primitive__ = [
41
42
  "prim_attr_register", "prim_arg_register", "Primitive", "PrimitiveWithInfer", "PrimitiveWithCheck", "signature"
@@ -44,11 +45,12 @@ __primitive__ = [
44
45
  __all__ = ["get_vm_impl_fn", "vm_impl_registry",
45
46
  "op_info_register", "custom_info_register", "AkgGpuRegOp", "AkgAscendRegOp", "AiCPURegOp", "TBERegOp",
46
47
  "CpuRegOp", "CustomRegOp", "DataType",
47
- "constexpr", "reshard"]
48
+ "constexpr", "reshard",
49
+ "cpp_create_prim_instance_helper", "gen_arg_dtype_cast", "gen_arg_handler", "gen_extend_func", "gen_ops_def",
50
+ "gen_ops_prim", "pyboost_inner_prim"]
48
51
  __all__.extend(__primitive__)
49
52
  __all__.extend(composite.__all__)
50
53
  __all__.extend(operations.__all__)
51
54
  __all__.extend(functional.__all__)
52
55
  __all__.extend(function.__all__)
53
56
  __all__.extend(auto_generate.__all__)
54
- _silent_check()
@@ -38,7 +38,6 @@ from mindspore.ops.operations.array_ops import SegmentMean
38
38
  from mindspore.ops.operations.array_ops import AffineGrid
39
39
  from mindspore.ops.operations.array_ops import MaskedScatter
40
40
  from mindspore.ops.operations.array_ops import MaskedSelect
41
- from mindspore.ops.operations.array_ops import CountNonZero
42
41
  from mindspore.ops.operations.random_ops import LogNormalReverse
43
42
  from mindspore.ops.operations.random_ops import ParameterizedTruncatedNormal
44
43
  from mindspore.ops.operations import _inner_ops as inner
@@ -125,16 +124,6 @@ def get_bprop_masked_scatter(self):
125
124
  return bprop
126
125
 
127
126
 
128
- @bprop_getters.register(CountNonZero)
129
- def get_bprop_countnonzero(self):
130
- """Grad definition for CountNonZero"""
131
-
132
- def bprop(x, out, dout):
133
- return (zeros_like(x),)
134
-
135
- return bprop
136
-
137
-
138
127
  @bprop_getters.register(Mvlgamma)
139
128
  def get_bprop_mvlgamma(self):
140
129
  """Grad definition for Mvlgamma"""
@@ -31,9 +31,11 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
31
31
  _GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
32
32
  ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
33
33
  _VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
34
- _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter)
34
+ _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter,
35
+ _VirtualAssignKvCache)
35
36
  from mindspore.ops._grad_experimental.grad_base import bprop_getters
36
37
  from mindspore.ops.operations import _grad_ops as G
38
+ import mindspore as ms
37
39
 
38
40
 
39
41
  @bprop_getters.register(AllReduce)
@@ -95,6 +97,12 @@ def get_bprop_send(self):
95
97
  dtype = self.get_attr_dict()["dtype"]
96
98
  tag = self.get_attr_dict()["sr_tag"]
97
99
  send_grad = Receive(tag, self.rank, shape, dtype, self.group_back)
100
+ if "dst_global_rank" in self.get_attr_dict():
101
+ dst_global_rank = self.get_attr_dict().get("dst_global_rank")
102
+ send_grad.add_prim_attr("src_global_rank", dst_global_rank)
103
+ if "RING_ATTENTION_INDEX" in self.get_attr_dict():
104
+ ringattention = self.get_attr_dict().get("RING_ATTENTION_INDEX")
105
+ send_grad.add_prim_attr("RING_ATTENTION_INDEX", ringattention)
98
106
  virtual_input = Tensor(0.0, dtype)
99
107
 
100
108
  def bprop(x, out, dout):
@@ -108,8 +116,16 @@ def get_bprop_send(self):
108
116
  def get_bprop_receive(self):
109
117
  """Generate bprop for Receive."""
110
118
  tag = self.get_attr_dict()["sr_tag"]
119
+ flash_tag = self.get_attr_dict().get("flash_tag")
111
120
  receive_grad = Send(tag, self.rank, self.group_back)
112
- receive_grad.add_prim_attr("shape", self.shape)
121
+ shape = self.get_attr_dict()["shape"]
122
+ receive_grad.add_prim_attr("shape", shape)
123
+ if "src_global_rank" in self.get_attr_dict():
124
+ src_global_rank = self.get_attr_dict().get("src_global_rank")
125
+ receive_grad.add_prim_attr("dst_global_rank", src_global_rank)
126
+ if "RING_ATTENTION_INDEX" in self.get_attr_dict():
127
+ ringattention = self.get_attr_dict().get("RING_ATTENTION_INDEX")
128
+ receive_grad.add_prim_attr("RING_ATTENTION_INDEX", ringattention)
113
129
  depend = P.Depend()
114
130
  cast = P.Cast()
115
131
  out_tensor = Tensor(0.0, mstype.float16)
@@ -117,7 +133,7 @@ def get_bprop_receive(self):
117
133
 
118
134
  def bprop(x, out, dout):
119
135
  send_out = receive_grad(dout)
120
- if is_opt_shard:
136
+ if is_opt_shard or (flash_tag == "True"):
121
137
  dx = depend(F.zeros_like(x), send_out)
122
138
  else:
123
139
  dx = depend(cast(out_tensor, F.dtype(x)), send_out)
@@ -164,6 +180,24 @@ def get_bprop_virtual_assign_add(self):
164
180
  return bprop
165
181
 
166
182
 
183
+ @bprop_getters.register(_VirtualAssignKvCache)
184
+ def get_bprop_virtual_assign_kv_cache(self):
185
+ """Generate bprop for VirtualAssignAdd."""
186
+ assign = P.Assign()
187
+ cast = P.Cast()
188
+ dtype = P.DType()
189
+ out_tensor = Tensor(0.0, mstype.float16)
190
+
191
+ def bprop(x, y, seq_chunk, out, dout):
192
+ dout_update = dout + y
193
+ kv_equal = F.equal(seq_chunk, 0)
194
+ update_kv = F.select(kv_equal, F.broadcast_to(cast(out_tensor, dtype(y)), F.shape(y)), dout_update)
195
+ return F.depend((dout_update, cast(out_tensor, dtype(y)),
196
+ cast(out_tensor, dtype(seq_chunk))), assign(y, update_kv))
197
+
198
+ return bprop
199
+
200
+
167
201
  @bprop_getters.register(_VirtualAccuGrad)
168
202
  def get_bprop_virtual_accu_grad(self):
169
203
  """Generate bprop for VirtualAccuGrad."""
@@ -186,6 +220,9 @@ def get_bprop_mirror_micro_step_operator(self):
186
220
  group = self.group
187
221
  dev_num = self.dev_num
188
222
  mean_flag = self.mean_flag
223
+ param_name = " "
224
+ if 'mirror_user_id' in self.get_attr_dict():
225
+ param_name = self.get_attr_dict()['mirror_user_id']
189
226
  scale = 1 / dev_num
190
227
 
191
228
  all_reduce = AllReduce(group=group)
@@ -196,7 +233,6 @@ def get_bprop_mirror_micro_step_operator(self):
196
233
  if hasattr(self, 'parameter'):
197
234
  parameter = self.parameter
198
235
  all_reduce.add_prim_attr("parameter", parameter)
199
-
200
236
  if self.instance_name:
201
237
  instance_name = "grad_mirror" + self.instance_name
202
238
  all_reduce.set_prim_instance_name(instance_name)
@@ -207,8 +243,14 @@ def get_bprop_mirror_micro_step_operator(self):
207
243
  assign.add_prim_attr("parameter_micro", 0)
208
244
  out_tensor = Tensor(1.0, mstype.float16)
209
245
  opt_shard = _get_enable_parallel_optimizer()
246
+ ln_print = P.Print()
247
+ reduce_sum = P.ReduceSum(keep_dims=False)
248
+ square = P.Square()
249
+ dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
210
250
 
211
251
  def bprop(x, z, out, dout):
252
+ if dump_local_norm:
253
+ z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
212
254
  real_grad = z
213
255
  assign_out = dout
214
256
  if issubclass_(F.typeof(dout), mstype.tensor_type):
@@ -309,6 +351,9 @@ def get_bprop_micro_step_all_gather(self):
309
351
  """Generate bprop for _MicroStepAllGather"""
310
352
  fusion = self.get_attr_dict()["fusion"]
311
353
  mean_flag = self.get_attr_dict()["mean_flag"]
354
+ param_name = " "
355
+ if 'mirror_user_id' in self.get_attr_dict():
356
+ param_name = self.get_attr_dict()['mirror_user_id']
312
357
  do_mirror = False
313
358
  if self.group != "":
314
359
  do_mirror = self.get_attr_dict()["do_mirror"]
@@ -324,6 +369,10 @@ def get_bprop_micro_step_all_gather(self):
324
369
  dtype = P.DType()
325
370
  out_tensor = Tensor(1.0, mstype.float16)
326
371
  with_mirror_operator = self.get_attr_dict()["with_mirror_operator"]
372
+ ln_print = P.Print()
373
+ reduce_sum = P.ReduceSum(keep_dims=False)
374
+ square = P.Square()
375
+ dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
327
376
 
328
377
  def bprop(x, z, out, dout):
329
378
  if with_mirror_operator:
@@ -334,6 +383,8 @@ def get_bprop_micro_step_all_gather(self):
334
383
  real_grad = F.tensor_mul(real_grad, scale)
335
384
  return (real_grad, cast(out_tensor, dtype(z)))
336
385
  z = F.depend(z, dout)
386
+ if dump_local_norm:
387
+ z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
337
388
  if not do_mirror:
338
389
  return (z, cast(out_tensor, dtype(z)))
339
390
  real_grad = reduce_scatter(z)
@@ -529,16 +580,25 @@ def get_bprop_mirror_operator(self):
529
580
  group = self.get_attr_dict()['group']
530
581
  dev_num = self.get_attr_dict()['dev_num']
531
582
  mean_flag = self.get_attr_dict()['mean_flag']
583
+ param_name = " "
584
+ if 'mirror_user_id' in self.get_attr_dict():
585
+ param_name = self.get_attr_dict()['mirror_user_id']
586
+
532
587
  dev_num_r = 1.0
588
+ dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
533
589
  if dev_num > 1:
534
590
  dev_num_r = 1.0 / dev_num
535
591
  all_reduce = AllReduce(group=group)
536
592
  all_gather = AllGather(group=group)
537
593
  mul = P.Mul()
538
594
  cast = P.Cast()
595
+ ln_print = P.Print()
596
+ reduce_sum = P.ReduceSum(keep_dims=False)
597
+ square = P.Square()
539
598
 
540
599
  fusion = self.get_attr_dict()["fusion"]
541
600
  all_reduce.add_prim_attr("fusion", fusion)
601
+ parameter = " "
542
602
  if hasattr(self, 'parameter'):
543
603
  parameter = self.parameter
544
604
  all_reduce.add_prim_attr("parameter", parameter)
@@ -548,6 +608,9 @@ def get_bprop_mirror_operator(self):
548
608
  all_reduce.set_prim_instance_name(instance_name)
549
609
 
550
610
  def bprop(x, out, dout):
611
+ if dump_local_norm:
612
+ dout = F.depend(dout, ln_print("dump local norm: ", param_name, reduce_sum(square((dout)))))
613
+
551
614
  if dev_num == 1:
552
615
  return (dout,)
553
616
  if mean_flag:
@@ -18,12 +18,9 @@
18
18
  import numpy as np
19
19
  import mindspore.numpy as mnp
20
20
  from mindspore.common import dtype as mstype
21
- import mindspore.ops as ops
22
21
  from mindspore.ops import functional as F
23
22
  from mindspore.ops import operations as P
24
23
  from mindspore import Tensor
25
- from mindspore.ops.operations.math_ops import SilentCheck
26
- from mindspore.ops.operations._inner_ops import _MirrorSilentCheck
27
24
  from mindspore.ops.operations.math_ops import CumulativeLogsumexp
28
25
  from mindspore.ops.operations.math_ops import MatrixSolve
29
26
  from mindspore.ops.operations.math_ops import MatrixSolveLs
@@ -803,22 +800,3 @@ def get_bprop_tensor_add(self):
803
800
  return binop_grad_common(x, y, dout, dout)
804
801
 
805
802
  return bprop
806
-
807
-
808
- @bprop_getters.register(_MirrorSilentCheck)
809
- def get_bprop_mirror_silent_check(self):
810
- """Grad definition for '_MirrorSilentCheck' op"""
811
- silent_check = SilentCheck(self.min_steps, self.thresh_l1, self.coeff_l1, self.thresh_l2, self.coeff_l2)
812
- out_tensor = Tensor([0.0], mstype.float32)
813
-
814
- def bporp(x, pre_val, min_val, max_val, n_step, loss_scale, out, dout):
815
- if dout.dtype == mstype.float16:
816
- return (dout, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
817
- if loss_scale is not None:
818
- gnorm = ops.norm(dout / loss_scale)
819
- else:
820
- gnorm = ops.norm(dout)
821
- dx, _, _, _, _ = silent_check(gnorm, dout, pre_val, min_val, max_val, n_step)
822
- return (dx, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
823
-
824
- return bporp
@@ -2113,6 +2113,7 @@ def get_split_vmap_rule(prim, axis_size):
2113
2113
 
2114
2114
  return vmap_rule
2115
2115
 
2116
+
2116
2117
  @vmap_rules_getters.register(P.SearchSorted)
2117
2118
  def get_searchsorted_vmap_rule(prim, axis_size):
2118
2119
  """VmapRule for `SearchSorted`."""
@@ -2131,10 +2132,7 @@ def get_searchsorted_vmap_rule(prim, axis_size):
2131
2132
  if sorter is not None and sorter_dim is not None:
2132
2133
  sorter = _bdim_at_front(sorter, sorter_dim, axis_size)
2133
2134
 
2134
- dtype, _ = dtype_bdim
2135
- right, _ = right_bdim
2136
-
2137
- outputs = prim(sequence, values, sorter, dtype, right)
2135
+ outputs = prim(sequence, values, sorter, dtype_bdim[0], right_bdim[0])
2138
2136
 
2139
2137
  return outputs, 0
2140
2138
 
@@ -916,6 +916,23 @@ def get_isclose_vmap_rule(prim, axis_size):
916
916
 
917
917
  return vmap_rule
918
918
 
919
+
920
+ @vmap_rules_getters.register(P.Round)
921
+ def get_round_vmap_rule(prim, axis_size):
922
+ """VmapRule for round."""
923
+ if isinstance(prim, str):
924
+ prim = Primitive(prim)
925
+
926
+ def vmap_rule(x_bdim, decimal_bdim):
927
+ var, x_dim = x_bdim
928
+ decimal_var, decimal_dim = decimal_bdim
929
+ if decimal_dim is not None:
930
+ _raise_value_error("For vmap, the batch axis of decimal must be none.")
931
+ out = prim(var, decimal_var)
932
+ return out, x_dim
933
+
934
+ return vmap_rule
935
+
919
936
  get_assign_vmap_rule = vmap_rules_getters.register(P.AssignAdd)(get_assign_vmap_rule)
920
937
  get_assign_vmap_rule = vmap_rules_getters.register(P.AssignSub)(get_assign_vmap_rule)
921
938
 
@@ -949,7 +966,6 @@ get_unop_vmap_rule = vmap_rules_getters.register(P.Reciprocal)(get_unop_vmap_rul
949
966
  get_unop_vmap_rule = vmap_rules_getters.register(P.Inv)(get_unop_vmap_rule)
950
967
  get_unop_vmap_rule = vmap_rules_getters.register(P.Invert)(get_unop_vmap_rule)
951
968
  get_unop_vmap_rule = vmap_rules_getters.register(P.Rint)(get_unop_vmap_rule)
952
- get_unop_vmap_rule = vmap_rules_getters.register(P.Round)(get_unop_vmap_rule)
953
969
  get_unop_vmap_rule = vmap_rules_getters.register(P.Rsqrt)(get_unop_vmap_rule)
954
970
  get_unop_vmap_rule = vmap_rules_getters.register("Sigmoid")(get_unop_vmap_rule)
955
971
  get_unop_vmap_rule = vmap_rules_getters.register(P.Sqrt)(get_unop_vmap_rule)
@@ -517,7 +517,6 @@ def get_in_top_k_vmap_rule(prim, axis_size):
517
517
 
518
518
  @vmap_rules_getters.register(G.FastGeLUGrad)
519
519
  @vmap_rules_getters.register(G.HSwishGrad)
520
- @vmap_rules_getters.register(G.SoftShrinkGrad)
521
520
  def get_common_activation_grad_vmap_rule(prim, axis_size):
522
521
  """VmapRule for common activation grad operation."""
523
522
  prim_name = prim.name
@@ -547,6 +546,49 @@ def get_common_activation_grad_vmap_rule(prim, axis_size):
547
546
  return vmap_rule
548
547
 
549
548
 
549
+ @vmap_rules_getters.register("SoftShrink")
550
+ def get_softshrink_vmap_rule(prim, axis_size):
551
+ """VmapRule for `SoftShrink`."""
552
+ def vmap_rule(x_bdim, lambd_bdim):
553
+ var, dim = x_bdim
554
+ lambd, _ = lambd_bdim
555
+ out = prim(var, lambd)
556
+ return out, dim
557
+
558
+ return vmap_rule
559
+
560
+
561
+ @vmap_rules_getters.register("SoftShrinkGrad")
562
+ def get_softshrink_grad_vmap_rule(prim, axis_size):
563
+ """VmapRule for `SoftShrinkGrad`."""
564
+ prim_name = prim.name
565
+
566
+ def vmap_rule(dy_bdim, x_bdim, lambd_bdim):
567
+ x, x_dim = x_bdim
568
+ lambd, _ = lambd_bdim
569
+ dy, dy_dim = dy_bdim
570
+ x_shape = F.shape(x)
571
+ dy_shape = F.shape(dy)
572
+ if x_dim == dy_dim and x_shape == dy_shape:
573
+ out = prim(dy, x, lambd)
574
+ return out, x_dim
575
+
576
+ if F.rank(x):
577
+ x = _bdim_at_front(x, x_dim, 1)
578
+ if F.rank(dy):
579
+ dy = _bdim_at_front(dy, dy_dim, 1)
580
+ x_shape = F.shape(x)
581
+ dy_shape = F.shape(dy)
582
+ if x_shape != dy_shape:
583
+ raise RuntimeError("For {} vmap, input x shape is supposed to be the same as input dy shape "
584
+ "after batch transforming, but got x_shape {}, dy_shape {}"
585
+ .format(prim_name, x_shape, dy_shape))
586
+ out = prim(dy, x, lambd)
587
+ return out, 0
588
+
589
+ return vmap_rule
590
+
591
+
550
592
  @vmap_rules_getters.register("HShrink")
551
593
  def get_hshrink_vmap_rule(prim, axis_size):
552
594
  """VmapRule for `HShrink`."""
@@ -2196,7 +2238,6 @@ get_unop_vmap_rule = vmap_rules_getters.register(P.SeLU)(get_unop_vmap_rule)
2196
2238
  get_unop_vmap_rule = vmap_rules_getters.register(P.HSigmoid)(get_unop_vmap_rule)
2197
2239
  get_unop_vmap_rule = vmap_rules_getters.register(P.Softplus)(get_unop_vmap_rule)
2198
2240
  get_unop_vmap_rule = vmap_rules_getters.register(P.Softsign)(get_unop_vmap_rule)
2199
- get_unop_vmap_rule = vmap_rules_getters.register(P.SoftShrink)(get_unop_vmap_rule)
2200
2241
  get_unop_vmap_rule = vmap_rules_getters.register(P.GeLU)(get_unop_vmap_rule)
2201
2242
  get_unop_vmap_rule = vmap_rules_getters.register(P.FastGeLU)(get_unop_vmap_rule)
2202
2243
  get_unop_vmap_rule = vmap_rules_getters.register(P.HSwish)(get_unop_vmap_rule)
@@ -21,6 +21,7 @@ op_args_default_value = {
21
21
  "AdamW": {"amsgrad": False, "maximize": False},
22
22
  "AddExt": {"alpha": 1},
23
23
  "AddLayerNormV2": {"epsilon": 1e-5, "additionalOut": False},
24
+ "ApplyAdamW": {"max_grad_norm": None, "amsgrad": False, "maximize": False},
24
25
  "ApplyCamePart2": {"sum_r": None, "global_shape": None},
25
26
  "ApplyCamePart3": {"global_shape": None, "use_first_moment": False},
26
27
  "ApplyCamePart4": {"global_shape": None},
@@ -29,6 +30,7 @@ op_args_default_value = {
29
30
  "ArgMaxExt": {"dim": None, "keepdim": False},
30
31
  "Argmax": {"axis": -1, "output_type": mstype.int32},
31
32
  "ArgMaxWithValue": {"axis": 0, "keep_dims": False},
33
+ "ArgMinExt": {"dim": None, "keepdim": False},
32
34
  "Argmin": {"axis": -1, "output_type": mstype.int32},
33
35
  "ArgMinWithValue": {"axis": 0, "keep_dims": False},
34
36
  "AvgPool2DGrad": {"padding": 0, "ceil_mode": False, "count_include_pad": True, "divisor_override": None},
@@ -36,8 +38,8 @@ op_args_default_value = {
36
38
  "AvgPoolGrad": {"kernel_size": 1, "strides": 1, "pad_mode": 'VALID', "data_format": 'NCHW'},
37
39
  "AvgPool": {"kernel_size": 1, "strides": 1, "pad_mode": 'VALID', "data_format": 'NCHW'},
38
40
  "BatchMatMul": {"transpose_a": False, "transpose_b": False},
39
- "BatchNormExt": {"training": False, "momentum": 0.1, "epsilon": 1e-5},
40
- "BatchNormGradExt": {"training": False, "eps": 1e-5},
41
+ "BatchNormExt": {"running_mean": None, "runnning_var": None, "training": False, "momentum": 0.1, "epsilon": 1e-5},
42
+ "BatchNormGradExt": {"running_mean": None, "running_var": None, "saved_mean": None, "saved_rstd": None, "training": False, "eps": 1e-5},
41
43
  "BatchNormGradGrad": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
42
44
  "BatchNormGrad": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
43
45
  "BatchNormGradWithActivation": {"is_training": False, "epsilon": 1e-5, "data_format": 'NCHW'},
@@ -63,10 +65,13 @@ op_args_default_value = {
63
65
  "ConvolutionGrad": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1, "output_mask": ()},
64
66
  "Convolution": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1},
65
67
  "Correlate": {"mode": 'valid'},
68
+ "CountNonZero": {"dim": None},
69
+ "Cross": {"dim": -65530},
66
70
  "CumProd": {"exclusive": False, "reverse": False},
67
71
  "CumSum": {"exclusive": False, "reverse": False},
68
72
  "CumsumExt": {"dtype": None},
69
- "DCT": {"axis": -1, "norm": 'BACKWARD', "forward": True, "grad": False},
73
+ "DCT": {"type": 2, "n": None, "axis": -1, "norm": None},
74
+ "DCTN": {"type": 2, "s": None, "axes": None, "norm": None},
70
75
  "Dense": {"bias": None},
71
76
  "Diagonal": {"offset": 0, "dim1": 0, "dim2": 1},
72
77
  "DivMod": {"rounding_mode": None},
@@ -75,13 +80,25 @@ op_args_default_value = {
75
80
  "EluExt": {"alpha": 1.0},
76
81
  "EluGradExt": {"alpha": 1.0},
77
82
  "Elu": {"alpha": 1.0},
83
+ "EmbeddingApplyAdaGrad": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
84
+ "EmbeddingApplyAdam": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
85
+ "EmbeddingApplyAdamW": {"ams_grad": (0,), "mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
86
+ "EmbeddingApplyFtrl": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
87
+ "EmbeddingApplyRmsprop": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
88
+ "EmbeddingApplySgd": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
78
89
  "EmbeddingDenseBackward": {"padding_idx": None, "scale_grad_by_freq": False},
90
+ "EmbeddingFeatureMappingFileSize": {"only_offset_flag": True},
91
+ "EmbeddingFeatureMappingFind": {"num": 1},
92
+ "EmbeddingFeatureMappingImport": {"only_offset_flag": True, "num": 1},
79
93
  "Embedding": {"padding_idx": None, "max_norm": None, "norm_type": 2.0, "scale_grad_by_freq": False},
94
+ "EmbeddingTableEvict": {"steps_to_live": 0},
80
95
  "ExtractImagePatches": {"padding": 'VALID'},
81
96
  "FFNExt": {"expertTokens": None, "bias1": None, "bias2": None, "scale": None, "offset": None, "deqScale1": None, "deqScale2": None, "antiquant_scale1": None, "antiquant_scale2": None, "antiquant_offset1": None, "antiquant_offset2": None, "activation": 'fastgelu', "inner_precise": 0},
82
97
  "FFT2": {"s": None, "dim": (-2, -1), "norm": None},
83
98
  "FFT": {"n": None, "dim": -1, "norm": None},
99
+ "FFTOrtho": {"axes": None, "forward": True},
84
100
  "FFTWithSize": {"norm": 'backward', "onesided": True, "signal_sizes": ()},
101
+ "FFTFreq": {"d": 1.0, "dtype": None},
85
102
  "FFTN": {"s": None, "dim": None, "norm": None},
86
103
  "FFTShift": {"dim": None},
87
104
  "FillScalar": {"dtype": None},
@@ -90,23 +107,42 @@ op_args_default_value = {
90
107
  "FlashAttentionScore": {"real_shift": None, "drop_mask": None, "padding_mask": None, "attn_mask": None, "prefix": None, "actual_seq_qlen": None, "actual_seq_kvlen": None, "keep_prob": 1.0, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "inner_precise": 0, "input_layout": 'BSH', "sparse_mode": 0},
91
108
  "FlattenExt": {"start_dim": 0, "end_dim": -1},
92
109
  "Gather": {"batch_dims": 0},
110
+ "GenerateEodMaskV2": {"start": 0, "steps": 1, "error_mode": 'cycle', "flip_mode": 'bitflip', "multiply_factor": 0.0, "bit_pos": 0, "flip_probability": 0.0},
93
111
  "GridSampler2DGrad": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
94
112
  "GridSampler2D": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
95
113
  "GridSampler3DGrad": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
96
114
  "GridSampler3D": {"interpolation_mode": 'bilinear', "padding_mode": 'zeros', "align_corners": False},
97
115
  "GroupNormGrad": {"dx_is_require": True, "dgamma_is_require": True, "dbeta_is_require": True},
98
116
  "GroupNorm": {"weight": None, "bias": None, "eps": 1e-5},
117
+ "HFFT2": {"s": None, "dim": (-2, -1), "norm": None},
118
+ "HFFT": {"n": None, "dim": -1, "norm": None},
119
+ "HFFTN": {"s": None, "dim": None, "norm": None},
120
+ "HistcExt": {"bins": 100, "min": 0, "max": 0},
99
121
  "HShrinkGrad": {"lambd": 0.5},
100
122
  "HShrink": {"lambd": 0.5},
123
+ "IDCT": {"type": 2, "n": None, "axis": -1, "norm": None},
124
+ "IDCTN": {"type": 2, "s": None, "axes": None, "norm": None},
101
125
  "IFFT2": {"s": None, "dim": (-2, -1), "norm": None},
102
126
  "IFFT": {"n": None, "dim": -1, "norm": None},
103
127
  "IFFTN": {"s": None, "dim": None, "norm": None},
104
128
  "IFFTShift": {"dim": None},
129
+ "IHFFT2": {"s": None, "dim": (-2, -1), "norm": None},
130
+ "IHFFT": {"n": None, "dim": -1, "norm": None},
131
+ "IHFFTN": {"s": None, "dim": None, "norm": None},
105
132
  "Im2ColExt": {"dilation": 1, "padding": 0, "stride": 1},
133
+ "IncreFlashAttention": {"attn_mask": None, "actual_seq_lengths": None, "pse_shift": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "kv_padding_size": None, "num_heads": 1, "input_layout": 'BSH', "scale_value": 1.0, "num_key_value_heads": 0, "block_size": 0, "inner_precise": 1},
106
134
  "IndexAddExt": {"alpha": 1},
107
- "IRFFTGrad": {"n": None, "dim": -1, "norm": None},
135
+ "InplaceAddExt": {"alpha": 1},
136
+ "InplaceAddmm": {"beta": 1, "alpha": 1},
137
+ "InplaceAddsExt": {"alpha": 1},
138
+ "InsertGemV2InBackward": {"start": 0, "steps": 1, "error_mode": 'cycle', "flip_mode": 'bitflip', "multiply_factor": 0.0, "bit_pos": 0, "flip_probability": 0.0},
139
+ "IRFFT2": {"s": None, "dim": (-2, -1), "norm": None},
140
+ "IRFFTDouble": {"dim": -1},
108
141
  "IRFFT": {"n": None, "dim": -1, "norm": None},
142
+ "IRFFTN": {"s": None, "dim": None, "norm": None},
109
143
  "IsClose": {"rtol": 1e-05, "atol": 1e-08, "equal_nan": True},
144
+ "L1LossBackwardExt": {"reduction": 'mean'},
145
+ "L1LossExt": {"reduction": 'mean'},
110
146
  "LayerNormExt": {"weight": None, "bias": None, "eps": 1e-5},
111
147
  "LayerNormGradGrad": {"begin_norm_axis": 1, "begin_params_axis": 1},
112
148
  "LayerNormGrad": {"begin_norm_axis": 1, "begin_params_axis": 1},
@@ -116,10 +152,13 @@ op_args_default_value = {
116
152
  "LeakyReLUExt": {"negative_slope": 0.01},
117
153
  "LeakyReLUGradExt": {"negative_slope": 0.01, "is_result": False},
118
154
  "LinSpaceExt": {"dtype": None},
155
+ "LogSoftmaxExt": {"dim": None, "dtype": None},
119
156
  "LogSoftmaxGrad": {"axis": -1},
120
157
  "LogSoftmax": {"axis": -1},
121
158
  "LogitGrad": {"eps": -1.0},
122
159
  "Logit": {"eps": -1.0},
160
+ "LpNormV2": {"p": 2.0, "dim": None, "keepdim": False, "epsilon": 1e-12},
161
+ "LstsqV2": {"driver": None},
123
162
  "MatMul": {"transpose_a": False, "transpose_b": False},
124
163
  "MaxPoolGradWithIndices": {"strides": None, "pads": 0, "dilation": (1, 1), "ceil_mode": False, "argmax_type": mstype.int64},
125
164
  "MaxPoolGradWithMask": {"strides": None, "pads": 0, "dilation": (1, 1), "ceil_mode": False, "argmax_type": mstype.int64},
@@ -128,20 +167,30 @@ op_args_default_value = {
128
167
  "MaximumGradGrad": {"grad_x": True, "grad_y": True},
129
168
  "MaximumGrad": {"grad_x": True, "grad_y": True},
130
169
  "MeanExt": {"axis": None, "keep_dims": False, "dtype": None},
170
+ "MedianDim": {"dim": -1, "keepdim": False},
131
171
  "MinimumGrad": {"grad_x": True, "grad_y": True},
172
+ "MSELossExt": {"reduction": 'mean'},
173
+ "MSELossGradExt": {"reduction": 'mean'},
132
174
  "NanToNum": {"nan": None, "posinf": None, "neginf": None},
133
175
  "NLLLossGrad": {"reduction": 'mean', "ignore_index": -100},
134
176
  "NLLLoss": {"reduction": 'mean', "ignore_index": -100},
135
- "Norm": {"ord": None, "dim": None, "keepdim": False, "dtype": None},
177
+ "Norm": {"p": 2.0, "dim": None, "keepdim": False, "dtype": None},
136
178
  "OneHotExt": {"axis": -1},
137
179
  "OneHot": {"axis": -1},
138
180
  "OnesLikeExt": {"dtype": None},
139
181
  "Ones": {"dtype": None},
182
+ "PagedAttentionMask": {"antiquant_scale": None, "antiquant_offset": None, "alibi_mask": None, "kv_cache_quant_mode": 'DEFAULT'},
183
+ "PagedAttention": {"antiquant_scale": None, "antiquant_offset": None, "attn_mask": None, "q_seq_lens": None, "kv_cache_quant_mode": 'DEFAULT'},
140
184
  "ProdExt": {"axis": None, "keep_dims": False, "dtype": None},
141
185
  "PromptKVCache": {"align_mode": 'LEFT'},
142
186
  "Qr": {"full_matrices": False},
143
187
  "RandExt": {"dtype": None},
144
188
  "RandLikeExt": {"dtype": None},
189
+ "RandIntLike": {"dtype": None},
190
+ "RandInt": {"dtype": None},
191
+ "RandnLike": {"dtype": None},
192
+ "Randn": {"dtype": None},
193
+ "RandpermExt": {"dtype": mstype.int64},
145
194
  "RandpermV2": {"seed": 0, "offset": 0, "dtype": mstype.int64},
146
195
  "Range": {"maxlen": 1000000},
147
196
  "ReduceAll": {"axis": None, "keep_dims": False},
@@ -165,16 +214,27 @@ op_args_default_value = {
165
214
  "ResizeNearestNeighbor": {"align_corners": False, "half_pixel_centers": False},
166
215
  "ResizeNearestNeighborV2Grad": {"align_corners": False, "half_pixel_centers": False},
167
216
  "ResizeNearestNeighborV2": {"align_corners": False, "half_pixel_centers": False},
168
- "RFFTGrad": {"n": None, "dim": -1, "norm": None},
217
+ "RFFT2": {"s": None, "dim": (-2, -1), "norm": None},
169
218
  "RFFT": {"n": None, "dim": -1, "norm": None},
219
+ "RFFTFreq": {"d": 1.0, "dtype": None},
220
+ "RFFTN": {"s": None, "dim": None, "norm": None},
170
221
  "RmsNorm": {"epsilon": 1e-6},
222
+ "Roll": {"axis": None},
223
+ "RotaryPositionEmbeddingGrad": {"dx": None, "mode": 0},
224
+ "RotaryPositionEmbedding": {"mode": 0},
225
+ "Round": {"decimals": 0},
171
226
  "ScalarToTensor": {"dtype": None},
227
+ "Scatter": {"reduce": 'none'},
228
+ "ScatterValue": {"reduce": 'none'},
172
229
  "SearchSorted": {"sorter": None, "dtype": mstype.int64, "right": False},
173
230
  "SequenceConcat": {"axis": 0},
231
+ "SilentCheckV2": {"c_min_steps": 7, "c_thresh_l1": 1000000.0, "c_coeff_l1": 100000.0, "c_thresh_l2": 10000.0, "c_coeff_l2": 5000.0, "npu_asd_detect": 1},
174
232
  "SoftmaxBackward": {"dim": -1},
175
233
  "Softmax": {"axis": -1},
176
234
  "SoftplusExt": {"beta": 1, "threshold": 20},
177
235
  "SoftplusGradExt": {"beta": 1, "threshold": 20},
236
+ "SoftShrinkGrad": {"lambd": 0.5},
237
+ "SoftShrink": {"lambd": 0.5},
178
238
  "SolveTriangular": {"trans": 0, "lower": False, "unit_diagonal": False},
179
239
  "SortExt": {"dim": -1, "descending": False, "stable": False},
180
240
  "Split": {"axis": 0, "output_num": 1},
@@ -184,11 +244,20 @@ op_args_default_value = {
184
244
  "StridedSlice": {"begin_mask": 0, "end_mask": 0, "ellipsis_mask": 0, "new_axis_mask": 0, "shrink_axis_mask": 0},
185
245
  "SubExt": {"alpha": 1},
186
246
  "SumExt": {"dim": None, "keepdim": False, "dtype": None},
247
+ "SwigluGrad": {"dim": -1},
248
+ "Swiglu": {"dim": -1},
249
+ "TensorScatterElements": {"axis": 0, "reduce": 'none'},
187
250
  "TopkExt": {"dim": -1, "largest": True, "sorted": True},
251
+ "TopKRouter": {"drop_type": 0},
252
+ "TraceV2Grad": {"offset": 0, "axis1": 1, "axis2": 0},
253
+ "TraceV2": {"offset": 0, "axis1": 1, "axis2": 0, "dtype": None},
254
+ "TrilExt": {"diagonal": 0},
188
255
  "Triu": {"diagonal": 0},
189
256
  "TupleToTensor": {"dtype": None},
190
257
  "Unique2": {"sorted": True, "return_inverse": False, "return_counts": False},
191
258
  "UnstackExt": {"axis": 0},
259
+ "UpsampleBicubic2DGrad": {"output_size": None, "scales": None, "align_corners": False},
260
+ "UpsampleBicubic2D": {"output_size": None, "scales": None, "align_corners": False},
192
261
  "UpsampleBilinear2DGrad": {"output_size": None, "scales": None, "align_corners": False},
193
262
  "UpsampleBilinear2D": {"output_size": None, "scales": None, "align_corners": False},
194
263
  "UpsampleLinear1DGrad": {"output_size": None, "scales": None, "align_corners": False},
@@ -203,12 +272,13 @@ op_args_default_value = {
203
272
  "UpsampleTrilinear3D": {"output_size": None, "scales": None, "align_corners": False},
204
273
  "ZerosLikeExt": {"dtype": None},
205
274
  "Zeros": {"dtype": None},
275
+ "AddRmsNormQuantV2": {"epsilon": 1e-5},
206
276
  "DynamicQuantExt": {"smooth_scales": None},
207
277
  "FusedInferAttentionScore": {"pse_shift": None, "attn_mask": None, "actual_seq_lengths": None, "actual_seq_lengths_kv": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "query_padding_size": None, "kv_padding_size": None, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "input_layout": 'BSH', "num_key_value_heads": 0, "sparse_mode": 0, "inner_precise": 1, "block_size": 0, "antiquant_mode": 0, "softmax_lse_flag": False},
208
278
  "GroupedMatmul": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1},
209
279
  "KVCacheScatterUpdate": {"reduce": 'none'},
210
280
  "MoeFinalizeRouting": {"x2": None, "bias": None, "scales": None, "expanded_row_idx": None, "expanded_expert_idx": None},
211
- "QuantBatchMatmul": {"offset": None, "bias": None, "transpose_x1": False, "transpose_x2": False, "dtype": mstype.float16},
281
+ "QuantBatchMatmul": {"offset": None, "bias": None, "pertokenScaleOptional": None, "transpose_x1": False, "transpose_x2": False, "dtype": mstype.float16},
212
282
  "QuantV2": {"sqrt_mode": False, "rounding_mode": 'ROUND', "dst_type": mstype.int8},
213
283
  "WeightQuantBatchMatmul": {"antiquant_offset": None, "quant_scale": None, "quant_offset": None, "bias": None, "transpose_x": False, "transpose_weight": False, "antiquant_group_size": 0},
214
284
  }
@@ -216,16 +286,30 @@ op_args_default_value = {
216
286
  op_labels = {
217
287
  "AdamWeightDecay": {"side_effect_mem": True},
218
288
  "AdamW": {"side_effect_mem": True},
289
+ "ApplyAdamW": {"side_effect_mem": True},
219
290
  "AssignAdd": {"side_effect_mem": True},
220
291
  "Assign": {"side_effect_mem": True},
292
+ "CopyExt": {"side_effect_mem": True},
221
293
  "DecoderKVCache": {"side_effect_mem": True},
222
294
  "DropoutExt": {"side_effect_hidden": True},
223
295
  "DropoutGenMaskExt": {"side_effect_hidden": True},
224
296
  "Dropout": {"side_effect_hidden": True},
297
+ "EmbeddingApplyAdaGrad": {"_process_node_engine_id": 'PS'},
298
+ "EmbeddingApplyAdam": {"_process_node_engine_id": 'PS'},
299
+ "EmbeddingApplyAdamW": {"_process_node_engine_id": 'PS'},
300
+ "EmbeddingApplyFtrl": {"_process_node_engine_id": 'PS'},
301
+ "EmbeddingApplyRmsprop": {"_process_node_engine_id": 'PS'},
302
+ "EmbeddingApplySgd": {"_process_node_engine_id": 'PS'},
225
303
  "Embedding": {"side_effect_mem": True},
304
+ "EmbeddingTableEvict": {"_process_node_engine_id": 'PS'},
226
305
  "Generator": {"side_effect_mem": True},
306
+ "InplaceAddExt": {"side_effect_mem": True},
307
+ "InplaceAddmm": {"side_effect_mem": True},
308
+ "InplaceAddsExt": {"side_effect_mem": True},
227
309
  "Log": {"cust_aicpu": 'Log', "base": -1.0, "scale": 1.0, "shift": 0.0},
228
310
  "PromptKVCache": {"side_effect_mem": True},
229
311
  "ReshapeAndCache": {"side_effect_mem": True},
230
312
  "ResizeD": {"mode": 'linear'},
313
+ "SilentCheckV2": {"side_effect_mem": True},
314
+ "KVCacheScatterUpdate": {"side_effect_mem": True},
231
315
  }