mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +8 -1
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +275 -64
- mindspore/common/_utils.py +0 -44
- mindspore/common/api.py +285 -35
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
- mindspore/common/hook_handle.py +60 -0
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/parameter.py +13 -107
- mindspore/common/recompute.py +4 -11
- mindspore/common/tensor.py +16 -169
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +5 -85
- mindspore/dataset/engine/datasets.py +8 -4
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dnnl.dll +0 -0
- mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +1 -1
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/distributed.py +182 -62
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +4 -4
- mindspore/mint/nn/layer/normalization.py +8 -13
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +16 -66
- mindspore/nn/layer/basic.py +49 -1
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +31 -124
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +0 -1
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +0 -1
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/wrap/grad_reducer.py +4 -74
- mindspore/numpy/array_creations.py +2 -2
- mindspore/numpy/fft.py +9 -9
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +0 -5
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
- mindspore/ops/auto_generate/gen_extend_func.py +2 -7
- mindspore/ops/auto_generate/gen_ops_def.py +98 -141
- mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +15 -1
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +1 -0
- mindspore/ops/function/array_func.py +14 -12
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +3 -4
- mindspore/ops/function/math_func.py +45 -54
- mindspore/ops/function/nn_func.py +75 -294
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +2 -0
- mindspore/ops/functional_overload.py +354 -18
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +1 -38
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +1 -0
- mindspore/ops/operations/comm_ops.py +94 -2
- mindspore/ops/operations/custom_ops.py +228 -19
- mindspore/ops/operations/debug_ops.py +27 -29
- mindspore/ops/operations/manually_defined/ops_def.py +27 -306
- mindspore/ops/operations/nn_ops.py +2 -2
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +1 -17
- mindspore/ops/tensor_method.py +72 -3
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +1 -4
- mindspore/parallel/_utils.py +29 -6
- mindspore/parallel/checkpoint_transform.py +18 -2
- mindspore/parallel/cluster/process_entity/_api.py +24 -32
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +117 -16
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +38 -2
- mindspore/profiler/common/path_manager.py +56 -24
- mindspore/profiler/common/profiler_context.py +2 -12
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/experimental_config.py +2 -1
- mindspore/profiler/platform/npu_profiler.py +33 -6
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +3 -2
- mindspore/runtime/executor.py +11 -3
- mindspore/runtime/memory.py +112 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +5 -18
- mindspore/train/amp.py +6 -4
- mindspore/train/callback/_checkpoint.py +0 -9
- mindspore/train/callback/_train_fault_tolerance.py +69 -18
- mindspore/train/data_sink.py +1 -5
- mindspore/train/model.py +38 -211
- mindspore/train/serialization.py +126 -387
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +144 -8
- mindspore/version.py +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/train/memory_profiling_pb2.py +0 -298
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
|
@@ -29,7 +29,7 @@ from mindspore.common.sparse_tensor import RowTensorInner
|
|
|
29
29
|
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
30
30
|
from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _HostAllGather, AllReduce,
|
|
31
31
|
NeighborExchange, AlltoAll, AlltoAllV, NeighborExchangeV2,
|
|
32
|
-
Broadcast, AllGatherV, ReduceScatterV,
|
|
32
|
+
Broadcast, AlltoAllVC, AllGatherV, ReduceScatterV,
|
|
33
33
|
_GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
|
|
34
34
|
ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
|
|
35
35
|
_VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
|
|
@@ -37,6 +37,7 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
|
|
|
37
37
|
_VirtualAssignKvCache)
|
|
38
38
|
from mindspore.ops._grad_experimental.grad_base import bprop_getters
|
|
39
39
|
from mindspore.ops.operations import _grad_ops as G
|
|
40
|
+
from mindspore.tools.sdc_detect import _sdc_detector
|
|
40
41
|
import mindspore as ms
|
|
41
42
|
|
|
42
43
|
_squared_device_local_norm = None
|
|
@@ -277,6 +278,8 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
277
278
|
if dump_device_local_norm:
|
|
278
279
|
# init _squared _squared_device_local_norm
|
|
279
280
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
281
|
+
# feature value sampling for sdc detect
|
|
282
|
+
feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
|
|
280
283
|
|
|
281
284
|
def bprop(x, z, out, dout):
|
|
282
285
|
if dump_local_norm or dump_device_local_norm:
|
|
@@ -289,6 +292,9 @@ def get_bprop_mirror_micro_step_operator(self):
|
|
|
289
292
|
if dump_device_local_norm:
|
|
290
293
|
z = F.depend(z, F.assign_add(squared_device_local_norm,
|
|
291
294
|
cast(squared_norm, squared_device_local_norm.dtype)))
|
|
295
|
+
if feat_value_dump_name and z.ndim > 1:
|
|
296
|
+
feat_value = square(F.max(F.abs(z))[0])
|
|
297
|
+
z = F.depend(z, tensor_dump(feat_value_dump_name, feat_value))
|
|
292
298
|
real_grad = z
|
|
293
299
|
assign_out = dout
|
|
294
300
|
if issubclass_(F.typeof(dout), mstype.tensor_type):
|
|
@@ -343,14 +349,16 @@ def get_bprop_all_gather(self):
|
|
|
343
349
|
ln_print = P.Print()
|
|
344
350
|
tensor_dump = P.TensorDump()
|
|
345
351
|
reduce_sum = P.ReduceSum(keep_dims=False)
|
|
346
|
-
square = P.Square()
|
|
347
352
|
sqrt = P.Sqrt()
|
|
353
|
+
square = P.Square()
|
|
348
354
|
if dump_local_norm_path:
|
|
349
355
|
global_rank = get_rank()
|
|
350
356
|
file = os.path.join(dump_local_norm_path, "rank_" + str(global_rank), "local_norm__" + param_name)
|
|
351
357
|
if dump_device_local_norm:
|
|
352
358
|
# init _squared _squared_device_local_norm
|
|
353
359
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
360
|
+
# feature value sampling for sdc detect
|
|
361
|
+
feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
|
|
354
362
|
|
|
355
363
|
def bprop(x, out, dout):
|
|
356
364
|
if param_name and (dump_local_norm or dump_device_local_norm):
|
|
@@ -363,6 +371,9 @@ def get_bprop_all_gather(self):
|
|
|
363
371
|
if dump_device_local_norm:
|
|
364
372
|
dout = F.depend(dout, F.assign_add(squared_device_local_norm,
|
|
365
373
|
cast(squared_norm, squared_device_local_norm.dtype)))
|
|
374
|
+
if param_name and feat_value_dump_name and dout.ndim > 1:
|
|
375
|
+
feat_value = square(F.max(F.abs(dout))[0])
|
|
376
|
+
dout = F.depend(dout, tensor_dump(feat_value_dump_name, feat_value))
|
|
366
377
|
|
|
367
378
|
dx = reduce_scatter(dout)
|
|
368
379
|
if mean_flag:
|
|
@@ -452,6 +463,8 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
452
463
|
if dump_device_local_norm:
|
|
453
464
|
# init _squared _squared_device_local_norm
|
|
454
465
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
466
|
+
# feature value sampling for sdc detect
|
|
467
|
+
feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
|
|
455
468
|
|
|
456
469
|
def bprop(x, z, out, dout):
|
|
457
470
|
if with_mirror_operator:
|
|
@@ -472,6 +485,9 @@ def get_bprop_micro_step_all_gather(self):
|
|
|
472
485
|
if dump_device_local_norm:
|
|
473
486
|
z = F.depend(z, F.assign_add(squared_device_local_norm,
|
|
474
487
|
cast(squared_norm, squared_device_local_norm.dtype)))
|
|
488
|
+
if feat_value_dump_name and z.ndim > 1:
|
|
489
|
+
feat_value = square(F.max(F.abs(z))[0])
|
|
490
|
+
z = F.depend(z, tensor_dump(feat_value_dump_name, feat_value))
|
|
475
491
|
if not do_mirror:
|
|
476
492
|
return (z, cast(out_tensor, dtype(z)))
|
|
477
493
|
real_grad = reduce_scatter(z)
|
|
@@ -655,6 +671,21 @@ def get_bprop_all_to_all_v(self):
|
|
|
655
671
|
return bprop
|
|
656
672
|
|
|
657
673
|
|
|
674
|
+
@bprop_getters.register(AlltoAllVC)
|
|
675
|
+
def get_bprop_all_to_all_v_c(self):
|
|
676
|
+
"""Generate bprop for AlltoAllVC."""
|
|
677
|
+
all_to_all_v_c_grad = AlltoAllVC(self.group, self.block_size, transpose=True)
|
|
678
|
+
if hasattr(self, "instance_name") and self.instance_name:
|
|
679
|
+
instance_name = "grad" + self.instance_name
|
|
680
|
+
all_to_all_v_c_grad.set_prim_instance_name(instance_name)
|
|
681
|
+
|
|
682
|
+
def bprop(x, send_count_matrix, out, dout):
|
|
683
|
+
dx = all_to_all_v_c_grad(dout, send_count_matrix)
|
|
684
|
+
return (dx, zeros_like(send_count_matrix))
|
|
685
|
+
|
|
686
|
+
return bprop
|
|
687
|
+
|
|
688
|
+
|
|
658
689
|
@bprop_getters.register(AllGatherV)
|
|
659
690
|
def get_bprop_all_gather_v(self):
|
|
660
691
|
"""Generate bprop for AllGatherV."""
|
|
@@ -728,6 +759,8 @@ def get_bprop_mirror_operator(self):
|
|
|
728
759
|
if dump_device_local_norm:
|
|
729
760
|
# init _squared _squared_device_local_norm
|
|
730
761
|
squared_device_local_norm = get_squared_device_local_norm_param()
|
|
762
|
+
# feature value sampling for sdc detect
|
|
763
|
+
feat_value_dump_name = _sdc_detector.get_dump_name(param_name) if _sdc_detector.need_sample() else None
|
|
731
764
|
if dev_num > 1:
|
|
732
765
|
dev_num_r = 1.0 / dev_num
|
|
733
766
|
all_reduce = AllReduce(group=group)
|
|
@@ -762,6 +795,9 @@ def get_bprop_mirror_operator(self):
|
|
|
762
795
|
if dump_device_local_norm:
|
|
763
796
|
dout = F.depend(dout, F.assign_add(squared_device_local_norm,
|
|
764
797
|
cast(squared_norm, squared_device_local_norm.dtype)))
|
|
798
|
+
if feat_value_dump_name and dout.ndim > 1:
|
|
799
|
+
feat_value = square(F.max(F.abs(dout))[0])
|
|
800
|
+
dout = F.depend(dout, tensor_dump(feat_value_dump_name, feat_value))
|
|
765
801
|
|
|
766
802
|
if dev_num == 1:
|
|
767
803
|
return (dout,)
|
|
@@ -214,13 +214,8 @@ from .cumsum import _cumsum_aicpu
|
|
|
214
214
|
from .round import _round_aicpu
|
|
215
215
|
from .stft import _stft_aicpu
|
|
216
216
|
from .floor_div import _floor_div_aicpu
|
|
217
|
-
from .priority_replay_buffer import _prb_create_op_cpu
|
|
218
|
-
from .priority_replay_buffer import _prb_push_op_cpu
|
|
219
217
|
from .conjugate_transpose import _conjugate_transpose_aicpu
|
|
220
|
-
from .priority_replay_buffer import _prb_sample_op_cpu
|
|
221
|
-
from .priority_replay_buffer import _prb_update_op_cpu
|
|
222
218
|
from .equal import _equal_aicpu
|
|
223
|
-
from .priority_replay_buffer import _prb_destroy_op_cpu
|
|
224
219
|
from .right_shift import _right_shift_aicpu
|
|
225
220
|
from .tril import _tril_aicpu
|
|
226
221
|
from .linspace import _lin_space_aicpu
|
|
@@ -242,10 +237,6 @@ from .sparse_tensor_to_csr_sparse_matrix import _sparse_tensor_to_csr_sparse_mat
|
|
|
242
237
|
from .csr_sparse_matrix_to_sparse_tensor import _csr_sparse_matrix_to_sparse_tensor_aicpu
|
|
243
238
|
from .linear_sum_assignment import _linear_sum_assignment_aicpu
|
|
244
239
|
from .random_shuffle import _random_shuffle_aicpu
|
|
245
|
-
from .reservoir_replay_buffer import _rrb_create_op_cpu
|
|
246
|
-
from .reservoir_replay_buffer import _rrb_push_op_cpu
|
|
247
|
-
from .reservoir_replay_buffer import _rrb_sample_op_cpu
|
|
248
|
-
from .reservoir_replay_buffer import _rrb_destroy_op_cpu
|
|
249
240
|
from .concat_offset import _concat_offset_aicpu
|
|
250
241
|
from .range import _range_aicpu
|
|
251
242
|
from .range_v2 import _range_v2_aicpu
|
|
@@ -414,7 +405,6 @@ from .segment_prod import _segment_prod_aicpu
|
|
|
414
405
|
from .segment_sum import _segment_sum_aicpu
|
|
415
406
|
from .set_size import _set_size_aicpu
|
|
416
407
|
from .slice import _slice_aicpu
|
|
417
|
-
from .sparse_cross import _sparse_cross_aicpu
|
|
418
408
|
from .sparse_slice import _sparse_slice_aicpu
|
|
419
409
|
from .sparse_softmax import _sparse_softmax_aicpu
|
|
420
410
|
from .sparse_tensor_dense_add import _sparse_tensor_dense_add_aicpu
|
|
@@ -69,12 +69,7 @@ from .tensor_copy_slices import _tensor_copy_slices_cpu
|
|
|
69
69
|
from .l2loss import _l2loss_cpu
|
|
70
70
|
from .pyexecute import _pyexecute_cpu
|
|
71
71
|
from .pyfunc import _pyfunc_cpu
|
|
72
|
-
from .buffer_append import _buffer_append_cpu
|
|
73
|
-
from .buffer_get import _buffer_get_cpu
|
|
74
72
|
from .raise_op import _raise_cpu
|
|
75
73
|
from .joinedstr_op import _joinedstr_cpu
|
|
76
|
-
from .buffer_sample import _buffer_sample_cpu
|
|
77
|
-
from .priority_replay_buffer import _prb_push_op_cpu
|
|
78
|
-
from .priority_replay_buffer import _prb_sample_op_cpu
|
|
79
74
|
from .space_to_batch_nd import _space_to_batch_nd_cpu
|
|
80
75
|
from .sspaddmm import _sspaddmm_cpu
|
|
@@ -105,6 +105,7 @@ op_args_default_value = {
|
|
|
105
105
|
"DCTN": {"type": 2, "s": None, "axes": None, "norm": None},
|
|
106
106
|
"DCT": {"type": 2, "n": None, "axis": -1, "norm": None},
|
|
107
107
|
"Dense": {"bias": None},
|
|
108
|
+
"DequantSwigluQuant": {"bias": None, "quant_scale": None, "quant_offset": None, "group_index": None, "activate_left": False, "quant_mode": 'static'},
|
|
108
109
|
"Diagonal": {"offset": 0, "dim1": 0, "dim2": 1},
|
|
109
110
|
"DiagonalView": {"offset": 0, "dim1": 0, "dim2": 1},
|
|
110
111
|
"DiagExt": {"diagonal": 0},
|
|
@@ -115,20 +116,10 @@ op_args_default_value = {
|
|
|
115
116
|
"EluExt": {"alpha": 1.0},
|
|
116
117
|
"EluGradExt": {"alpha": 1.0, "is_result": False},
|
|
117
118
|
"Elu": {"alpha": 1.0},
|
|
118
|
-
"EmbeddingApplyAdamW": {"ams_grad": (0,), "mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
119
|
-
"EmbeddingApplyAdam": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
120
|
-
"EmbeddingApplyAdaGrad": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
121
|
-
"EmbeddingApplyFtrl": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
122
|
-
"EmbeddingApplyRmsprop": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
123
|
-
"EmbeddingApplySgd": {"mask_zero": (0,), "padding_key": (0,), "padding_key_mask": (1,), "completion_key": (0,), "completion_key_mask": (1,), "_embedding_dim": 1, "_max_key_num": 1},
|
|
124
119
|
"EmbeddingDenseBackward": {"padding_idx": None, "scale_grad_by_freq": False},
|
|
125
|
-
"EmbeddingFeatureMappingFileSize": {"only_offset_flag": True},
|
|
126
|
-
"EmbeddingFeatureMappingFind": {"num": 1},
|
|
127
|
-
"EmbeddingFeatureMappingImport": {"only_offset_flag": True, "num": 1},
|
|
128
120
|
"Embedding": {"padding_idx": None, "max_norm": None, "norm_type": 2.0, "scale_grad_by_freq": False},
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"Empty": {"dtype": None, "device": None},
|
|
121
|
+
"EmptyLike": {"dtype": None, "device": None, "pin_memory": False},
|
|
122
|
+
"Empty": {"dtype": None, "device": None, "pin_memory": False},
|
|
132
123
|
"ExtractImagePatches": {"padding": 'VALID'},
|
|
133
124
|
"FFNExt": {"expertTokens": None, "bias1": None, "bias2": None, "scale": None, "offset": None, "deqScale1": None, "deqScale2": None, "antiquant_scale1": None, "antiquant_scale2": None, "antiquant_offset1": None, "antiquant_offset2": None, "activation": 'fastgelu', "inner_precise": 0},
|
|
134
125
|
"FFT2": {"s": None, "dim": (-2, -1), "norm": None},
|
|
@@ -179,6 +170,7 @@ op_args_default_value = {
|
|
|
179
170
|
"IndexAddExt": {"alpha": 1},
|
|
180
171
|
"InnerInplaceIndexPut": {"accumulate": False},
|
|
181
172
|
"InnerMoeTokenUnpermute": {"probs": None, "padded_mode": False, "restore_shape": None},
|
|
173
|
+
"InnerUnique": {"sorted": True, "return_inverse": False},
|
|
182
174
|
"InplaceAddmm": {"beta": 1, "alpha": 1},
|
|
183
175
|
"InplaceAddsExt": {"alpha": 1},
|
|
184
176
|
"InplaceAddExt": {"alpha": 1},
|
|
@@ -353,6 +345,9 @@ op_args_default_value = {
|
|
|
353
345
|
"TopKRouter": {"drop_type": 0},
|
|
354
346
|
"TopkExt": {"dim": -1, "largest": True, "sorted": True},
|
|
355
347
|
"TopPRouter": {"drop_type": 0, "threshold": 0.0, "router_prob": 0.0},
|
|
348
|
+
"ToDevice": {"device": None, "dtype": None, "non_blocking": False, "copy": False},
|
|
349
|
+
"ToDtype": {"dtype": None, "non_blocking": False, "copy": False},
|
|
350
|
+
"ToOther": {"non_blocking": False, "copy": False},
|
|
356
351
|
"TraceV2Grad": {"offset": 0, "axis1": 1, "axis2": 0},
|
|
357
352
|
"TraceV2": {"offset": 0, "axis1": 1, "axis2": 0, "dtype": None},
|
|
358
353
|
"TriangularSolve": {"upper": True, "transpose": False, "unitriangular": False},
|
|
@@ -387,7 +382,7 @@ op_args_default_value = {
|
|
|
387
382
|
"FusedInferAttentionScore": {"pse_shift": None, "attn_mask": None, "actual_seq_lengths": None, "actual_seq_lengths_kv": None, "dequant_scale1": None, "quant_scale1": None, "dequant_scale2": None, "quant_scale2": None, "quant_offset2": None, "antiquant_scale": None, "antiquant_offset": None, "block_table": None, "query_padding_size": None, "kv_padding_size": None, "key_antiquant_scale": None, "key_antiquant_offset": None, "value_antiquant_scale": None, "value_antiquant_offset": None, "key_shared_prefix": None, "value_shared_prefix": None, "actual_shared_prefix_len": None, "num_heads": 1, "scale_value": 1.0, "pre_tokens": 2147483647, "next_tokens": 2147483647, "input_layout": 'BSH', "num_key_value_heads": 0, "sparse_mode": 0, "inner_precise": 1, "block_size": 0, "antiquant_mode": 0, "softmax_lse_flag": False, "key_antiquant_mode": 0, "value_antiquant_mode": 0},
|
|
388
383
|
"GroupedMatmul": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1, "transpose_a": False, "transpose_b": False},
|
|
389
384
|
"GroupedMatmulV2": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "group_list": None, "split_item": 0, "group_type": -1},
|
|
390
|
-
"GroupedMatmulV4": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "pre_token_scale": None, "group_list": None, "activation_input": None, "activation_quant_scale": None, "activation_quant_offset": None, "split_item": 0, "group_type": -1, "group_list_type": 0, "act_type": 0},
|
|
385
|
+
"GroupedMatmulV4": {"bias": None, "scale": None, "offset": None, "antiquant_scale": None, "antiquant_offset": None, "pre_token_scale": None, "group_list": None, "activation_input": None, "activation_quant_scale": None, "activation_quant_offset": None, "split_item": 0, "group_type": -1, "group_list_type": 0, "act_type": 0, "output_dtype": None},
|
|
391
386
|
"KVCacheScatterUpdate": {"reduce": 'none'},
|
|
392
387
|
"MatmulAllReduceAddRmsNorm": {"reduce_op": 'sum', "comm_turn": 0, "stream_mode": 1},
|
|
393
388
|
"MoeFinalizeRouting": {"x2": None, "bias": None, "scales": None, "expanded_row_idx": None, "expanded_expert_idx": None},
|
|
@@ -435,11 +430,13 @@ op_args_default_value = {
|
|
|
435
430
|
"DeprecatedMedian": {"axis": -1, "keepdims": False},
|
|
436
431
|
"DeprecatedMin": {"axis": None, "keepdims": False, "initial": None, "where": True, "return_indices": False},
|
|
437
432
|
"DeprecatedNansum": {"axis": None, "keepdims": False, "dtype": None},
|
|
433
|
+
"DeprecatedPermute": {"axis": None},
|
|
438
434
|
"DeprecatedProd": {"dim": None, "keepdim": False, "dtype": None},
|
|
439
435
|
"DeprecatedRepeatInterleave": {"dim": None},
|
|
440
436
|
"DeprecatedRoll": {"dims": None},
|
|
441
437
|
"DeprecatedSort": {"axis": -1, "descending": False},
|
|
442
438
|
"DeprecatedSplit": {"axis": 0},
|
|
439
|
+
"DeprecatedSqueeze": {"axis": None},
|
|
443
440
|
"DeprecatedStd": {"axis": None, "ddof": 0, "keepdims": False},
|
|
444
441
|
"DeprecatedSum": {"axis": None, "dtype": None, "keepdims": False, "initial": None},
|
|
445
442
|
"DeprecatedTake": {"axis": None, "mode": 'clip'},
|
|
@@ -463,19 +460,10 @@ op_labels = {
|
|
|
463
460
|
"BroadcastToView": {"side_effect_mem": True},
|
|
464
461
|
"ChunkView": {"side_effect_mem": True},
|
|
465
462
|
"DiagonalView": {"side_effect_mem": True},
|
|
466
|
-
"DistCommAllReduce": {"side_effect_mem": True},
|
|
467
|
-
"DistCommReduce": {"side_effect_mem": True},
|
|
468
463
|
"DropoutExt": {"side_effect_hidden": True},
|
|
469
464
|
"DropoutGenMaskExt": {"side_effect_hidden": True},
|
|
470
465
|
"Dropout": {"side_effect_hidden": True},
|
|
471
|
-
"EmbeddingApplyAdamW": {"_process_node_engine_id": 'PS'},
|
|
472
|
-
"EmbeddingApplyAdam": {"_process_node_engine_id": 'PS'},
|
|
473
|
-
"EmbeddingApplyAdaGrad": {"_process_node_engine_id": 'PS'},
|
|
474
|
-
"EmbeddingApplyFtrl": {"_process_node_engine_id": 'PS'},
|
|
475
|
-
"EmbeddingApplyRmsprop": {"_process_node_engine_id": 'PS'},
|
|
476
|
-
"EmbeddingApplySgd": {"_process_node_engine_id": 'PS'},
|
|
477
466
|
"Embedding": {"side_effect_mem": True},
|
|
478
|
-
"EmbeddingTableEvict": {"_process_node_engine_id": 'PS'},
|
|
479
467
|
"ExpandDimsView": {"side_effect_mem": True},
|
|
480
468
|
"Generator": {"side_effect_mem": True},
|
|
481
469
|
"GroupTopk": {"side_effect_mem": True},
|
|
@@ -502,10 +490,12 @@ op_labels = {
|
|
|
502
490
|
"InplaceGroupedMatmulAdd": {"side_effect_mem": True},
|
|
503
491
|
"InplaceHardtanh": {"side_effect_mem": True},
|
|
504
492
|
"InplaceIndexAddExt": {"side_effect_mem": True},
|
|
493
|
+
"InplaceIndexCopy": {"side_effect_mem": True},
|
|
505
494
|
"InplaceIndexPut": {"side_effect_mem": True},
|
|
506
495
|
"InplaceLog": {"side_effect_mem": True},
|
|
507
496
|
"InplaceMaskedFillScalar": {"side_effect_mem": True},
|
|
508
497
|
"InplaceMaskedFillTensor": {"side_effect_mem": True},
|
|
498
|
+
"InplaceMaskedScatter": {"side_effect_mem": True},
|
|
509
499
|
"InplaceMatmulAdd": {"side_effect_mem": True},
|
|
510
500
|
"InplaceMuls": {"side_effect_mem": True},
|
|
511
501
|
"InplaceMul": {"side_effect_mem": True},
|
|
@@ -520,6 +510,8 @@ op_labels = {
|
|
|
520
510
|
"InplaceScatterSrcReduce": {"side_effect_mem": True},
|
|
521
511
|
"InplaceScatterValue": {"side_effect_mem": True},
|
|
522
512
|
"InplaceScatterValueReduce": {"side_effect_mem": True},
|
|
513
|
+
"InplaceSigmoid": {"side_effect_mem": True},
|
|
514
|
+
"InplaceSign": {"side_effect_mem": True},
|
|
523
515
|
"InplaceSiLU": {"side_effect_mem": True},
|
|
524
516
|
"InplaceSubExt": {"side_effect_mem": True},
|
|
525
517
|
"InplaceSubScalar": {"side_effect_mem": True},
|
|
@@ -542,5 +534,7 @@ op_labels = {
|
|
|
542
534
|
"TransposeView": {"side_effect_mem": True},
|
|
543
535
|
"UnstackExtView": {"side_effect_mem": True},
|
|
544
536
|
"KVCacheScatterUpdate": {"side_effect_mem": True},
|
|
537
|
+
"DistCommAllReduce": {"side_effect_mem": True},
|
|
538
|
+
"DistCommReduce": {"side_effect_mem": True},
|
|
545
539
|
"InplaceExponential": {"side_effect_mem": True},
|
|
546
540
|
}
|
|
@@ -905,7 +905,6 @@ def histc(input, bins=100, min=0, max=0):
|
|
|
905
905
|
Elements lower than min or higher than max are ignored.
|
|
906
906
|
|
|
907
907
|
.. warning::
|
|
908
|
-
This is an experimental API that is subject to change or deletion.
|
|
909
908
|
If input is int64, valid values fit within int32; exceeding this may cause precision errors.
|
|
910
909
|
|
|
911
910
|
Args:
|
|
@@ -1732,9 +1731,6 @@ def outer(input, vec2):
|
|
|
1732
1731
|
Return outer product of `input` and `vec2`. If `input` is a vector of size :math:`n`
|
|
1733
1732
|
and `vec2` is a vector of size :math:`m` , then output must be a matrix of shape :math:`(n, m)` .
|
|
1734
1733
|
|
|
1735
|
-
.. warning::
|
|
1736
|
-
This is an experimental API that is subject to change or deletion.
|
|
1737
|
-
|
|
1738
1734
|
.. note::
|
|
1739
1735
|
This function does not broadcast.
|
|
1740
1736
|
|
|
@@ -1957,14 +1953,13 @@ def stack(tensors, dim=0):
|
|
|
1957
1953
|
:math:`(x_1, x_2, ..., x_{dim}, N, x_{dim+1}, ..., x_R)`.
|
|
1958
1954
|
|
|
1959
1955
|
Args:
|
|
1960
|
-
tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape
|
|
1956
|
+
tensors (Union[tuple, list]): A Tuple or list of Tensor objects with the same shape.
|
|
1961
1957
|
dim (int, optional): Dimension to stack. The range is [-(R+1), R+1). Default: ``0`` .
|
|
1962
1958
|
|
|
1963
1959
|
Returns:
|
|
1964
|
-
|
|
1960
|
+
A stacked Tensor.
|
|
1965
1961
|
|
|
1966
1962
|
Raises:
|
|
1967
|
-
TypeError: If the data types of elements in `tensors` are not the same.
|
|
1968
1963
|
ValueError: If `dim` is out of the range [-(R+1), R+1);
|
|
1969
1964
|
or if the shapes of elements in `tensors` are not the same.
|
|
1970
1965
|
|