mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (290) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_extends/parse/compile_config.py +24 -1
  7. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
  8. mindspore/_extends/parse/resources.py +1 -1
  9. mindspore/_extends/parse/standard_method.py +8 -1
  10. mindspore/_extends/parse/trope.py +2 -1
  11. mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/boost/base.py +29 -2
  18. mindspore/common/_decorator.py +3 -2
  19. mindspore/common/_grad_function.py +3 -1
  20. mindspore/common/_tensor_cpp_method.py +1 -1
  21. mindspore/common/_tensor_docs.py +275 -64
  22. mindspore/common/_utils.py +0 -44
  23. mindspore/common/api.py +285 -35
  24. mindspore/common/dump.py +7 -108
  25. mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
  26. mindspore/common/hook_handle.py +60 -0
  27. mindspore/common/jit_config.py +5 -1
  28. mindspore/common/jit_trace.py +27 -12
  29. mindspore/common/lazy_inline.py +5 -3
  30. mindspore/common/parameter.py +13 -107
  31. mindspore/common/recompute.py +4 -11
  32. mindspore/common/tensor.py +16 -169
  33. mindspore/communication/_comm_helper.py +11 -1
  34. mindspore/communication/comm_func.py +138 -4
  35. mindspore/communication/management.py +85 -1
  36. mindspore/config/op_info.config +0 -15
  37. mindspore/context.py +5 -85
  38. mindspore/dataset/engine/datasets.py +8 -4
  39. mindspore/dataset/engine/datasets_vision.py +1 -1
  40. mindspore/dataset/engine/validators.py +1 -15
  41. mindspore/dnnl.dll +0 -0
  42. mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
  43. mindspore/graph/custom_pass.py +55 -0
  44. mindspore/include/dataset/execute.h +2 -2
  45. mindspore/jpeg62.dll +0 -0
  46. mindspore/mindrecord/__init__.py +3 -3
  47. mindspore/mindrecord/common/exceptions.py +1 -0
  48. mindspore/mindrecord/config.py +1 -1
  49. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  50. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  51. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  52. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  53. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  54. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  55. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  56. mindspore/mindrecord/filereader.py +4 -4
  57. mindspore/mindrecord/filewriter.py +5 -5
  58. mindspore/mindrecord/mindpage.py +2 -2
  59. mindspore/mindrecord/tools/cifar10.py +1 -1
  60. mindspore/mindrecord/tools/cifar100.py +1 -1
  61. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  62. mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
  63. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  64. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  65. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  66. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  67. mindspore/mindspore_backend_common.dll +0 -0
  68. mindspore/mindspore_backend_manager.dll +0 -0
  69. mindspore/mindspore_cluster.dll +0 -0
  70. mindspore/mindspore_common.dll +0 -0
  71. mindspore/mindspore_core.dll +0 -0
  72. mindspore/mindspore_cpu.dll +0 -0
  73. mindspore/mindspore_dump.dll +0 -0
  74. mindspore/mindspore_frontend.dll +0 -0
  75. mindspore/mindspore_glog.dll +0 -0
  76. mindspore/mindspore_hardware_abstract.dll +0 -0
  77. mindspore/mindspore_memory_pool.dll +0 -0
  78. mindspore/mindspore_ms_backend.dll +0 -0
  79. mindspore/mindspore_ops.dll +0 -0
  80. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  81. mindspore/mindspore_profiler.dll +0 -0
  82. mindspore/mindspore_pyboost.dll +0 -0
  83. mindspore/mindspore_pynative.dll +0 -0
  84. mindspore/mindspore_runtime_pipeline.dll +0 -0
  85. mindspore/mindspore_runtime_utils.dll +0 -0
  86. mindspore/mindspore_tools.dll +0 -0
  87. mindspore/mint/__init__.py +15 -10
  88. mindspore/mint/distributed/distributed.py +182 -62
  89. mindspore/mint/nn/__init__.py +2 -16
  90. mindspore/mint/nn/functional.py +4 -110
  91. mindspore/mint/nn/layer/__init__.py +0 -2
  92. mindspore/mint/nn/layer/activation.py +0 -6
  93. mindspore/mint/nn/layer/basic.py +0 -47
  94. mindspore/mint/nn/layer/conv.py +4 -4
  95. mindspore/mint/nn/layer/normalization.py +8 -13
  96. mindspore/mint/nn/layer/pooling.py +0 -4
  97. mindspore/nn/__init__.py +1 -3
  98. mindspore/nn/cell.py +16 -66
  99. mindspore/nn/layer/basic.py +49 -1
  100. mindspore/nn/layer/container.py +16 -0
  101. mindspore/nn/layer/embedding.py +4 -169
  102. mindspore/nn/layer/normalization.py +2 -1
  103. mindspore/nn/layer/thor_layer.py +4 -85
  104. mindspore/nn/optim/ada_grad.py +0 -1
  105. mindspore/nn/optim/adafactor.py +0 -1
  106. mindspore/nn/optim/adam.py +31 -124
  107. mindspore/nn/optim/adamax.py +0 -1
  108. mindspore/nn/optim/asgd.py +0 -1
  109. mindspore/nn/optim/ftrl.py +8 -102
  110. mindspore/nn/optim/lamb.py +0 -1
  111. mindspore/nn/optim/lars.py +0 -3
  112. mindspore/nn/optim/lazyadam.py +25 -218
  113. mindspore/nn/optim/momentum.py +5 -43
  114. mindspore/nn/optim/optimizer.py +6 -55
  115. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  116. mindspore/nn/optim/rmsprop.py +0 -1
  117. mindspore/nn/optim/rprop.py +0 -1
  118. mindspore/nn/optim/sgd.py +0 -1
  119. mindspore/nn/optim/tft_wrapper.py +0 -1
  120. mindspore/nn/optim/thor.py +0 -2
  121. mindspore/nn/probability/bijector/bijector.py +7 -8
  122. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  123. mindspore/nn/probability/bijector/power_transform.py +20 -21
  124. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  125. mindspore/nn/probability/bijector/softplus.py +13 -14
  126. mindspore/nn/wrap/grad_reducer.py +4 -74
  127. mindspore/numpy/array_creations.py +2 -2
  128. mindspore/numpy/fft.py +9 -9
  129. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  130. mindspore/onnx/onnx_export.py +137 -0
  131. mindspore/opencv_core4110.dll +0 -0
  132. mindspore/opencv_imgcodecs4110.dll +0 -0
  133. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  134. mindspore/ops/__init__.py +2 -0
  135. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  136. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  137. mindspore/ops/_op_impl/cpu/__init__.py +0 -5
  138. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
  139. mindspore/ops/auto_generate/gen_extend_func.py +2 -7
  140. mindspore/ops/auto_generate/gen_ops_def.py +98 -141
  141. mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
  142. mindspore/ops/communication.py +97 -0
  143. mindspore/ops/composite/__init__.py +5 -2
  144. mindspore/ops/composite/base.py +15 -1
  145. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  146. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  147. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  148. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  149. mindspore/ops/function/__init__.py +1 -0
  150. mindspore/ops/function/array_func.py +14 -12
  151. mindspore/ops/function/comm_func.py +3883 -0
  152. mindspore/ops/function/debug_func.py +3 -4
  153. mindspore/ops/function/math_func.py +45 -54
  154. mindspore/ops/function/nn_func.py +75 -294
  155. mindspore/ops/function/random_func.py +9 -18
  156. mindspore/ops/functional.py +2 -0
  157. mindspore/ops/functional_overload.py +354 -18
  158. mindspore/ops/operations/__init__.py +2 -5
  159. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  160. mindspore/ops/operations/_inner_ops.py +1 -38
  161. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  162. mindspore/ops/operations/array_ops.py +1 -0
  163. mindspore/ops/operations/comm_ops.py +94 -2
  164. mindspore/ops/operations/custom_ops.py +228 -19
  165. mindspore/ops/operations/debug_ops.py +27 -29
  166. mindspore/ops/operations/manually_defined/ops_def.py +27 -306
  167. mindspore/ops/operations/nn_ops.py +2 -2
  168. mindspore/ops/operations/sparse_ops.py +0 -83
  169. mindspore/ops/primitive.py +1 -17
  170. mindspore/ops/tensor_method.py +72 -3
  171. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  172. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  173. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  174. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  175. mindspore/ops_generate/common/gen_constants.py +11 -10
  176. mindspore/ops_generate/common/op_proto.py +18 -1
  177. mindspore/ops_generate/common/template.py +102 -245
  178. mindspore/ops_generate/common/template_utils.py +212 -0
  179. mindspore/ops_generate/gen_custom_ops.py +69 -0
  180. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  181. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  182. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  183. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  184. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  185. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  186. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  187. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  188. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  189. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  190. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  191. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  192. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  193. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  194. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  195. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  196. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  197. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  198. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  199. mindspore/parallel/_cell_wrapper.py +1 -1
  200. mindspore/parallel/_parallel_serialization.py +1 -4
  201. mindspore/parallel/_utils.py +29 -6
  202. mindspore/parallel/checkpoint_transform.py +18 -2
  203. mindspore/parallel/cluster/process_entity/_api.py +24 -32
  204. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  205. mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
  206. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  207. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  208. mindspore/parallel/strategy.py +336 -0
  209. mindspore/parallel/transform_safetensors.py +117 -16
  210. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
  211. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  212. mindspore/profiler/common/constant.py +5 -0
  213. mindspore/profiler/common/file_manager.py +9 -0
  214. mindspore/profiler/common/msprof_cmd_tool.py +38 -2
  215. mindspore/profiler/common/path_manager.py +56 -24
  216. mindspore/profiler/common/profiler_context.py +2 -12
  217. mindspore/profiler/common/profiler_info.py +3 -3
  218. mindspore/profiler/common/profiler_path_manager.py +13 -0
  219. mindspore/profiler/common/util.py +30 -3
  220. mindspore/profiler/experimental_config.py +2 -1
  221. mindspore/profiler/platform/npu_profiler.py +33 -6
  222. mindspore/run_check/_check_version.py +108 -24
  223. mindspore/runtime/__init__.py +3 -2
  224. mindspore/runtime/executor.py +11 -3
  225. mindspore/runtime/memory.py +112 -0
  226. mindspore/swresample-4.dll +0 -0
  227. mindspore/swscale-6.dll +0 -0
  228. mindspore/tinyxml2.dll +0 -0
  229. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  230. mindspore/tools/data_dump.py +130 -0
  231. mindspore/tools/sdc_detect.py +91 -0
  232. mindspore/tools/stress_detect.py +63 -0
  233. mindspore/train/__init__.py +6 -6
  234. mindspore/train/_utils.py +5 -18
  235. mindspore/train/amp.py +6 -4
  236. mindspore/train/callback/_checkpoint.py +0 -9
  237. mindspore/train/callback/_train_fault_tolerance.py +69 -18
  238. mindspore/train/data_sink.py +1 -5
  239. mindspore/train/model.py +38 -211
  240. mindspore/train/serialization.py +126 -387
  241. mindspore/turbojpeg.dll +0 -0
  242. mindspore/utils/__init__.py +6 -3
  243. mindspore/utils/dlpack.py +92 -0
  244. mindspore/utils/dryrun.py +1 -1
  245. mindspore/utils/runtime_execution_order_check.py +10 -0
  246. mindspore/utils/sdc_detect.py +14 -12
  247. mindspore/utils/stress_detect.py +43 -0
  248. mindspore/utils/utils.py +144 -8
  249. mindspore/version.py +1 -1
  250. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  251. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
  252. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
  253. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  254. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  255. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  256. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  257. mindspore/experimental/llm_boost/register.py +0 -130
  258. mindspore/experimental/llm_boost/utils.py +0 -31
  259. mindspore/include/OWNERS +0 -7
  260. mindspore/mindspore_cpu_res_manager.dll +0 -0
  261. mindspore/mindspore_ops_kernel_common.dll +0 -0
  262. mindspore/mindspore_res_manager.dll +0 -0
  263. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  264. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  265. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  266. mindspore/nn/reinforcement/tensor_array.py +0 -145
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  270. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  271. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  272. mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
  273. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  274. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  275. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  276. mindspore/ops/operations/_tensor_array.py +0 -359
  277. mindspore/ops/operations/rl_ops.py +0 -288
  278. mindspore/parallel/_offload_context.py +0 -275
  279. mindspore/parallel/_recovery_context.py +0 -115
  280. mindspore/parallel/_transformer/__init__.py +0 -35
  281. mindspore/parallel/_transformer/layers.py +0 -765
  282. mindspore/parallel/_transformer/loss.py +0 -251
  283. mindspore/parallel/_transformer/moe.py +0 -693
  284. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  285. mindspore/parallel/_transformer/transformer.py +0 -3124
  286. mindspore/parallel/mpi/_mpi_config.py +0 -116
  287. mindspore/train/memory_profiling_pb2.py +0 -298
  288. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  289. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  290. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,7 @@ from mindspore.common.sparse_tensor import RowTensorInner
29
29
  from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
30
30
  from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _HostAllGather, AllReduce,
31
31
  NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
32
- Broadcast, AllGatherV, ReduceScatterV,
32
+ Broadcast, AlltoAllVC, AllGatherV, ReduceScatterV,
33
33
  _GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
34
34
  ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
35
35
  _VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
@@ -37,6 +37,7 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
37
37
  _VirtualAssignKvCache)
38
38
  from mindspore.ops._grad_experimental.grad_base import bprop_getters
39
39
  from mindspore.ops.operations import _grad_ops as G
40
+ from mindspore.tools.sdc_detect import _sdc_detector
40
41
  import mindspore as ms
41
42
 
42
43
  _squared_device_local_norm = None
@@ -277,6 +278,8 @@ def get_bprop_mirror_micro_step_operator(self):
277
278
  if dump_device_local_norm:
278
279
  # init _squared _squared_device_local_norm
279
280
  squared_device_local_norm = get_squared_device_local_norm_param()
281
+ # feature value sampling for sdc detect
282
+ feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
280
283
 
281
284
  def bprop(x, z, out, dout):
282
285
  if dump_local_norm or dump_device_local_norm:
@@ -289,6 +292,9 @@ def get_bprop_mirror_micro_step_operator(self):
289
292
  if dump_device_local_norm:
290
293
  z = F.depend(z, F.assign_add(squared_device_local_norm,
291
294
  cast(squared_norm, squared_device_local_norm.dtype)))
295
+ if feat_value_dump_name and z.ndim > 1:
296
+ feat_value = square(F.max(F.abs(z))[0])
297
+ z = F.depend(z, tensor_dump(feat_value_dump_name, feat_value))
292
298
  real_grad = z
293
299
  assign_out = dout
294
300
  if issubclass_(F.typeof(dout), mstype.tensor_type):
@@ -343,14 +349,16 @@ def get_bprop_all_gather(self):
343
349
  ln_print = P.Print()
344
350
  tensor_dump = P.TensorDump()
345
351
  reduce_sum = P.ReduceSum(keep_dims=False)
346
- square = P.Square()
347
352
  sqrt = P.Sqrt()
353
+ square = P.Square()
348
354
  if dump_local_norm_path:
349
355
  global_rank = get_rank()
350
356
  file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
351
357
  if dump_device_local_norm:
352
358
  # init _squared _squared_device_local_norm
353
359
  squared_device_local_norm = get_squared_device_local_norm_param()
360
+ # feature value sampling for sdc detect
361
+ feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
354
362
 
355
363
  def bprop(x, out, dout):
356
364
  if param_name and (dump_local_norm or dump_device_local_norm):
@@ -363,6 +371,9 @@ def get_bprop_all_gather(self):
363
371
  if dump_device_local_norm:
364
372
  dout = F.depend(dout, F.assign_add(squared_device_local_norm,
365
373
  cast(squared_norm, squared_device_local_norm.dtype)))
374
+ if param_name and feat_value_dump_name and dout.ndim > 1:
375
+ feat_value = square(F.max(F.abs(dout))[0])
376
+ dout = F.depend(dout, tensor_dump(feat_value_dump_name, feat_value))
366
377
 
367
378
  dx = reduce_scatter(dout)
368
379
  if mean_flag:
@@ -452,6 +463,8 @@ def get_bprop_micro_step_all_gather(self):
452
463
  if dump_device_local_norm:
453
464
  # init _squared _squared_device_local_norm
454
465
  squared_device_local_norm = get_squared_device_local_norm_param()
466
+ # feature value sampling for sdc detect
467
+ feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
455
468
 
456
469
  def bprop(x, z, out, dout):
457
470
  if with_mirror_operator:
@@ -472,6 +485,9 @@ def get_bprop_micro_step_all_gather(self):
472
485
  if dump_device_local_norm:
473
486
  z = F.depend(z, F.assign_add(squared_device_local_norm,
474
487
  cast(squared_norm, squared_device_local_norm.dtype)))
488
+ if feat_value_dump_name and z.ndim > 1:
489
+ feat_value = square(F.max(F.abs(z))[0])
490
+ z = F.depend(z, tensor_dump(feat_value_dump_name, feat_value))
475
491
  if not do_mirror:
476
492
  return (z, cast(out_tensor, dtype(z)))
477
493
  real_grad = reduce_scatter(z)
@@ -655,6 +671,21 @@ def get_bprop_all_to_all_v(self):
655
671
  return bprop
656
672
 
657
673
 
674
+ @bprop_getters.register(AlltoAllVC)
675
+ def get_bprop_all_to_all_v_c(self):
676
+ """Generate bprop for AlltoAllVC."""
677
+ all_to_all_v_c_grad = AlltoAllVC(self.group, self.block_size, transpose=True)
678
+ if hasattr(self, "instance_name") and self.instance_name:
679
+ instance_name = "grad" + self.instance_name
680
+ all_to_all_v_c_grad.set_prim_instance_name(instance_name)
681
+
682
+ def bprop(x, send_count_matrix, out, dout):
683
+ dx = all_to_all_v_c_grad(dout, send_count_matrix)
684
+ return (dx, zeros_like(send_count_matrix))
685
+
686
+ return bprop
687
+
688
+
658
689
  @bprop_getters.register(AllGatherV)
659
690
  def get_bprop_all_gather_v(self):
660
691
  """Generate bprop for AllGatherV."""
@@ -728,6 +759,8 @@ def get_bprop_mirror_operator(self):
728
759
  if dump_device_local_norm:
729
760
  # init _squared _squared_device_local_norm
730
761
  squared_device_local_norm = get_squared_device_local_norm_param()
762
+ # feature value sampling for sdc detect
763
+ feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
731
764
  if dev_num > 1:
732
765
  dev_num_r = 1.0 / dev_num
733
766
  all_reduce = AllReduce(group=group)
@@ -762,6 +795,9 @@ def get_bprop_mirror_operator(self):
762
795
  if dump_device_local_norm:
763
796
  dout = F.depend(dout, F.assign_add(squared_device_local_norm,
764
797
  cast(squared_norm, squared_device_local_norm.dtype)))
798
+ if feat_value_dump_name and dout.ndim > 1:
799
+ feat_value = square(F.max(F.abs(dout))[0])
800
+ dout = F.depend(dout, tensor_dump(feat_value_dump_name, feat_value))
765
801
 
766
802
  if dev_num == 1:
767
803
  return (dout,)
@@ -214,13 +214,8 @@ from .cumsum import _cumsum_aicpu
214
214
  from .round import _round_aicpu
215
215
  from .stft import _stft_aicpu
216
216
  from .floor_div import _floor_div_aicpu
217
- from .priority_replay_buffer import _prb_create_op_cpu
218
- from .priority_replay_buffer import _prb_push_op_cpu
219
217
  from .conjugate_transpose import _conjugate_transpose_aicpu
220
- from .priority_replay_buffer import _prb_sample_op_cpu
221
- from .priority_replay_buffer import _prb_update_op_cpu
222
218
  from .equal import _equal_aicpu
223
- from .priority_replay_buffer import _prb_destroy_op_cpu
224
219
  from .right_shift import _right_shift_aicpu
225
220
  from .tril import _tril_aicpu
226
221
  from .linspace import _lin_space_aicpu
@@ -242,10 +237,6 @@ from .sparse_tensor_to_csr_sparse_matrix import _sparse_tensor_to_csr_sparse_mat
242
237
  from .csr_sparse_matrix_to_sparse_tensor import _csr_sparse_matrix_to_sparse_tensor_aicpu
243
238
  from .linear_sum_assignment import _linear_sum_assignment_aicpu
244
239
  from .random_shuffle import _random_shuffle_aicpu
245
- from .reservoir_replay_buffer import _rrb_create_op_cpu
246
- from .reservoir_replay_buffer import _rrb_push_op_cpu
247
- from .reservoir_replay_buffer import _rrb_sample_op_cpu
248
- from .reservoir_replay_buffer import _rrb_destroy_op_cpu
249
240
  from .concat_offset import _concat_offset_aicpu
250
241
  from .range import _range_aicpu
251
242
  from .range_v2 import _range_v2_aicpu
@@ -414,7 +405,6 @@ from .segment_prod import _segment_prod_aicpu
414
405
  from .segment_sum import _segment_sum_aicpu
415
406
  from .set_size import _set_size_aicpu
416
407
  from .slice import _slice_aicpu
417
- from .sparse_cross import _sparse_cross_aicpu
418
408
  from .sparse_slice import _sparse_slice_aicpu
419
409
  from .sparse_softmax import _sparse_softmax_aicpu
420
410
  from .sparse_tensor_dense_add import _sparse_tensor_dense_add_aicpu
@@ -69,12 +69,7 @@ from .tensor_copy_slices import _tensor_copy_slices_cpu
69
69
  from .l2loss import _l2loss_cpu
70
70
  from .pyexecute import _pyexecute_cpu
71
71
  from .pyfunc import _pyfunc_cpu
72
- from .buffer_append import _buffer_append_cpu
73
- from .buffer_get import _buffer_get_cpu
74
72
  from .raise_op import _raise_cpu
75
73
  from .joinedstr_op import _joinedstr_cpu
76
- from .buffer_sample import _buffer_sample_cpu
77
- from .priority_replay_buffer import _prb_push_op_cpu
78
- from .priority_replay_buffer import _prb_sample_op_cpu
79
74
  from .space_to_batch_nd import _space_to_batch_nd_cpu
80
75
  from .sspaddmm import _sspaddmm_cpu
@@ -105,6 +105,7 @@ op_args_default_value = {
105
105
  "DCTN": {"type": 2, "s": None, "axes": None, "norm": None},
106
106
  "DCT": {"type": 2, "n": None, "axis": -1, "norm": None},
107
107
  "Dense": {"bias": None},
108
+ "DequantSwigluQuant": {"bias": None, "quant_scale": None, "quant_offset": None, "group_index": None, "activate_left": False, "quant_mode": 'static'},
108
109
  "Diagonal": {"offset": 0, "dim1": 0, "dim2": 1},
109
110
  "DiagonalView": {"offset": 0, "dim1": 0, "dim2": 1},
110
111
  "DiagExt": {"diagonal": 0},
@@ -115,20 +116,10 @@ op_args_default_value = {
115
116
  "EluExt": {"alpha": 1.0},
116
117
  "EluGradExt": {"alpha": 1.0, "is_result": False},
117
118
  "Elu": {"alpha": 1.0},
118
- "EmbeddingApplyAdamW": {"ams_grad": (0,), "mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
119
- "EmbeddingApplyAdam": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
120
- "EmbeddingApplyAdaGrad": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
121
- "EmbeddingApplyFtrl": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
122
- "EmbeddingApplyRmsprop": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
123
- "EmbeddingApplySgd": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
124
119
  "EmbeddingDenseBackward": {"padding_idx": None, "scale_grad_by_freq": False},
125
- "EmbeddingFeatureMappingFileSize": {"only_offset_flag": True},
126
- "EmbeddingFeatureMappingFind": {"num": 1},
127
- "EmbeddingFeatureMappingImport": {"only_offset_flag": True, "num": 1},
128
120
  "Embedding": {"padding_idx": None, "max_norm": None, "norm_type": 2.0, "scale_grad_by_freq": False},
129
- "EmbeddingTableEvict": {"steps_to_live": 0},
130
- "EmptyLike": {"dtype": None, "device": None},
131
- "Empty": {"dtype": None, "device": None},
121
+ "EmptyLike": {"dtype": None, "device": None, "pin_memory": False},
122
+ "Empty": {"dtype": None, "device": None, "pin_memory": False},
132
123
  "ExtractImagePatches": {"padding": 'VALID'},
133
124
  "FFNExt": {"expertTokens": None, "bias1": None, "bias2": None, "scale": None, "offset": None, "deqScale1": None, "deqScale2": None, "antiquant_scale1": None, "antiquant_scale2": None, "antiquant_offset1": None, "antiquant_offset2": None, "activation": 'fastgelu', "inner_precise": 0},
134
125
  "FFT2": {"s": None, "dim": (-2, -1), "norm": None},
@@ -179,6 +170,7 @@ op_args_default_value = {
179
170
  "IndexAddExt": {"alpha": 1},
180
171
  "InnerInplaceIndexPut": {"accumulate": False},
181
172
  "InnerMoeTokenUnpermute": {"probs": None, "padded_mode": False, "restore_shape": None},
173
+ "InnerUnique": {"sorted": True, "return_inverse": False},
182
174
  "InplaceAddmm": {"beta": 1, "alpha": 1},
183
175
  "InplaceAddsExt": {"alpha": 1},
184
176
  "InplaceAddExt": {"alpha": 1},
@@ -353,6 +345,9 @@ op_args_default_value = {
353
345
  "TopKRouter": {"drop_type": 0},
354
346
  "TopkExt": {"dim": -1, "largest": True, "sorted": True},
355
347
  "TopPRouter": {"drop_type": 0, "threshold": 0.0, "router_prob": 0.0},
348
+ "ToDevice": {"device": None, "dtype": None, "non_blocking": False, "copy": False},
349
+ "ToDtype": {"dtype": None, "non_blocking": False, "copy": False},
350
+ "ToOther": {"non_blocking": False, "copy": False},
356
351
  "TraceV2Grad": {"offset": 0, "axis1": 1, "axis2": 0},
357
352
  "TraceV2": {"offset": 0, "axis1": 1, "axis2": 0, "dtype": None},
358
353
  "TriangularSolve": {"upper": True, "transpose": False, "unitriangular": False},
@@ -387,7 +382,7 @@ op_args_default_value = {
387
382
  "FusedInferAttentionScore": {"pse_shift": None, "attn_mask": None, "actual_seq_lengths": None, "actual_seq_lengths_kv": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "query_padding_size": None, "kv_padding_size": None, "key_antiquant_scale": None, "key_antiquant_offset": None, "value_antiquant_scale": None, "value_antiquant_offset": None, "key_shared_prefix": None, "value_shared_prefix": None, "actual_shared_prefix_len": None, "num_heads": 1, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "input_layout": 'BSH', "num_key_value_heads": 0, "sparse_mode": 0, "inner_precise": 1, "block_size": 0, "antiquant_mode": 0, "softmax_lse_flag": False, "key_antiquant_mode": 0, "value_antiquant_mode": 0},
388
383
  "GroupedMatmul": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1, "transpose_a": False, "transpose_b": False},
389
384
  "GroupedMatmulV2": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1},
390
- "GroupedMatmulV4": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "pre_token_scale": None, "group_list": None, "activation_input": None, "activation_quant_scale": None, "activation_quant_offset": None, "split_item": 0, "group_type": -1, "group_list_type": 0, "act_type": 0},
385
+ "GroupedMatmulV4": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "pre_token_scale": None, "group_list": None, "activation_input": None, "activation_quant_scale": None, "activation_quant_offset": None, "split_item": 0, "group_type": -1, "group_list_type": 0, "act_type": 0, "output_dtype": None},
391
386
  "KVCacheScatterUpdate": {"reduce": 'none'},
392
387
  "MatmulAllReduceAddRmsNorm": {"reduce_op": 'sum', "comm_turn": 0, "stream_mode": 1},
393
388
  "MoeFinalizeRouting": {"x2": None, "bias": None, "scales": None, "expanded_row_idx": None, "expanded_expert_idx": None},
@@ -435,11 +430,13 @@ op_args_default_value = {
435
430
  "DeprecatedMedian": {"axis": -1, "keepdims": False},
436
431
  "DeprecatedMin": {"axis": None, "keepdims": False, "initial": None, "where": True, "return_indices": False},
437
432
  "DeprecatedNansum": {"axis": None, "keepdims": False, "dtype": None},
433
+ "DeprecatedPermute": {"axis": None},
438
434
  "DeprecatedProd": {"dim": None, "keepdim": False, "dtype": None},
439
435
  "DeprecatedRepeatInterleave": {"dim": None},
440
436
  "DeprecatedRoll": {"dims": None},
441
437
  "DeprecatedSort": {"axis": -1, "descending": False},
442
438
  "DeprecatedSplit": {"axis": 0},
439
+ "DeprecatedSqueeze": {"axis": None},
443
440
  "DeprecatedStd": {"axis": None, "ddof": 0, "keepdims": False},
444
441
  "DeprecatedSum": {"axis": None, "dtype": None, "keepdims": False, "initial": None},
445
442
  "DeprecatedTake": {"axis": None, "mode": 'clip'},
@@ -463,19 +460,10 @@ op_labels = {
463
460
  "BroadcastToView": {"side_effect_mem": True},
464
461
  "ChunkView": {"side_effect_mem": True},
465
462
  "DiagonalView": {"side_effect_mem": True},
466
- "DistCommAllReduce": {"side_effect_mem": True},
467
- "DistCommReduce": {"side_effect_mem": True},
468
463
  "DropoutExt": {"side_effect_hidden": True},
469
464
  "DropoutGenMaskExt": {"side_effect_hidden": True},
470
465
  "Dropout": {"side_effect_hidden": True},
471
- "EmbeddingApplyAdamW": {"_process_node_engine_id": 'PS'},
472
- "EmbeddingApplyAdam": {"_process_node_engine_id": 'PS'},
473
- "EmbeddingApplyAdaGrad": {"_process_node_engine_id": 'PS'},
474
- "EmbeddingApplyFtrl": {"_process_node_engine_id": 'PS'},
475
- "EmbeddingApplyRmsprop": {"_process_node_engine_id": 'PS'},
476
- "EmbeddingApplySgd": {"_process_node_engine_id": 'PS'},
477
466
  "Embedding": {"side_effect_mem": True},
478
- "EmbeddingTableEvict": {"_process_node_engine_id": 'PS'},
479
467
  "ExpandDimsView": {"side_effect_mem": True},
480
468
  "Generator": {"side_effect_mem": True},
481
469
  "GroupTopk": {"side_effect_mem": True},
@@ -502,10 +490,12 @@ op_labels = {
502
490
  "InplaceGroupedMatmulAdd": {"side_effect_mem": True},
503
491
  "InplaceHardtanh": {"side_effect_mem": True},
504
492
  "InplaceIndexAddExt": {"side_effect_mem": True},
493
+ "InplaceIndexCopy": {"side_effect_mem": True},
505
494
  "InplaceIndexPut": {"side_effect_mem": True},
506
495
  "InplaceLog": {"side_effect_mem": True},
507
496
  "InplaceMaskedFillScalar": {"side_effect_mem": True},
508
497
  "InplaceMaskedFillTensor": {"side_effect_mem": True},
498
+ "InplaceMaskedScatter": {"side_effect_mem": True},
509
499
  "InplaceMatmulAdd": {"side_effect_mem": True},
510
500
  "InplaceMuls": {"side_effect_mem": True},
511
501
  "InplaceMul": {"side_effect_mem": True},
@@ -520,6 +510,8 @@ op_labels = {
520
510
  "InplaceScatterSrcReduce": {"side_effect_mem": True},
521
511
  "InplaceScatterValue": {"side_effect_mem": True},
522
512
  "InplaceScatterValueReduce": {"side_effect_mem": True},
513
+ "InplaceSigmoid": {"side_effect_mem": True},
514
+ "InplaceSign": {"side_effect_mem": True},
523
515
  "InplaceSiLU": {"side_effect_mem": True},
524
516
  "InplaceSubExt": {"side_effect_mem": True},
525
517
  "InplaceSubScalar": {"side_effect_mem": True},
@@ -542,5 +534,7 @@ op_labels = {
542
534
  "TransposeView": {"side_effect_mem": True},
543
535
  "UnstackExtView": {"side_effect_mem": True},
544
536
  "KVCacheScatterUpdate": {"side_effect_mem": True},
537
+ "DistCommAllReduce": {"side_effect_mem": True},
538
+ "DistCommReduce": {"side_effect_mem": True},
545
539
  "InplaceExponential": {"side_effect_mem": True},
546
540
  }
@@ -905,7 +905,6 @@ def histc(input, bins=100, min=0, max=0):
905
905
  Elements lower than min or higher than max are ignored.
906
906
 
907
907
  .. warning::
908
- This is an experimental API that is subject to change or deletion.
909
908
  If input is int64, valid values fit within int32; exceeding this may cause precision errors.
910
909
 
911
910
  Args:
@@ -1732,9 +1731,6 @@ def outer(input, vec2):
1732
1731
  Return outer product of `input` and `vec2`. If `input` is a vector of size :math:`n`
1733
1732
  and `vec2` is a vector of size :math:`m` , then output must be a matrix of shape :math:`(n, m)` .
1734
1733
 
1735
- .. warning::
1736
- This is an experimental API that is subject to change or deletion.
1737
-
1738
1734
  .. note::
1739
1735
  This function does not broadcast.
1740
1736
 
@@ -1957,14 +1953,13 @@ def stack(tensors, dim=0):
1957
1953
  :math:`(x_1, x_2, ..., x_{dim}, N, x_{dim+1}, ..., x_R)`.
1958
1954
 
1959
1955
  Args:
1960
- tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape and type.
1956
+ tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape.
1961
1957
  dim (int, optional): Dimension to stack. The range is [-(R+1), R+1). Default: ``0`` .
1962
1958
 
1963
1959
  Returns:
1964
- Tensor. A stacked Tensor with the same type as `tensors`.
1960
+ A stacked Tensor.
1965
1961
 
1966
1962
  Raises:
1967
- TypeError: If the data types of elements in `tensors` are not the same.
1968
1963
  ValueError: If `dim` is out of the range [-(R+1), R+1);
1969
1964
  or if the shapes of elements in `tensors` are not the same.
1970
1965