mindspore 2.4.0__cp310-cp310-manylinux1_x86_64.whl → 2.4.1__cp310-cp310-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (100) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-310-x86_64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-310-x86_64-linux-gnu.so +0 -0
  4. mindspore/bin/cache_admin +0 -0
  5. mindspore/bin/cache_server +0 -0
  6. mindspore/common/initializer.py +51 -15
  7. mindspore/common/parameter.py +18 -4
  8. mindspore/common/tensor.py +15 -49
  9. mindspore/communication/comm_func.py +7 -7
  10. mindspore/context.py +9 -0
  11. mindspore/include/mindapi/base/format.h +13 -0
  12. mindspore/lib/libdnnl.so.2 +0 -0
  13. mindspore/lib/libmindspore_backend.so +0 -0
  14. mindspore/lib/libmindspore_common.so +0 -0
  15. mindspore/lib/libmindspore_core.so +0 -0
  16. mindspore/lib/libmindspore_glog.so.0 +0 -0
  17. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  18. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  19. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  20. mindspore/lib/libmindspore_ops.so +0 -0
  21. mindspore/lib/libopencv_core.so.4.5 +0 -0
  22. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  23. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  24. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +10 -10
  25. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +8 -8
  26. mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
  27. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  28. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  29. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +5 -5
  30. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  31. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  32. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
  33. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  34. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +1 -0
  35. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +6 -1
  36. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +4 -3
  37. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_impl.so +0 -0
  38. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  39. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  40. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  41. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  42. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  43. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  44. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  45. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  46. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  47. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o +0 -0
  48. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  49. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  50. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  51. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  52. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  53. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  54. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  55. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  56. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  57. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  58. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  59. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  60. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  61. mindspore/mint/__init__.py +490 -2
  62. mindspore/mint/nn/__init__.py +2 -2
  63. mindspore/mint/optim/adamw.py +6 -14
  64. mindspore/nn/cell.py +1 -3
  65. mindspore/nn/layer/basic.py +24 -7
  66. mindspore/nn/layer/embedding.py +31 -14
  67. mindspore/nn/optim/tft_wrapper.py +12 -15
  68. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  69. mindspore/ops/_grad_experimental/grad_comm_ops.py +20 -1
  70. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
  71. mindspore/ops/auto_generate/gen_extend_func.py +33 -0
  72. mindspore/ops/auto_generate/gen_ops_def.py +52 -3
  73. mindspore/ops/auto_generate/gen_ops_prim.py +155 -6
  74. mindspore/ops/function/array_func.py +2 -0
  75. mindspore/ops/function/math_func.py +7 -1
  76. mindspore/ops/function/random_func.py +221 -7
  77. mindspore/ops/operations/__init__.py +1 -1
  78. mindspore/ops/operations/array_ops.py +3 -1
  79. mindspore/ops/operations/comm_ops.py +21 -0
  80. mindspore/ops/operations/manually_defined/ops_def.py +8 -10
  81. mindspore/parallel/_auto_parallel_context.py +3 -1
  82. mindspore/parallel/_cell_wrapper.py +2 -0
  83. mindspore/parallel/_tensor.py +46 -2
  84. mindspore/parallel/_utils.py +40 -21
  85. mindspore/parallel/transform_safetensors.py +196 -43
  86. mindspore/profiler/profiling.py +5 -1
  87. mindspore/run_check/_check_version.py +4 -2
  88. mindspore/train/_utils.py +92 -32
  89. mindspore/train/callback/_checkpoint.py +12 -9
  90. mindspore/train/callback/_on_request_exit.py +12 -1
  91. mindspore/train/callback/_tft_register.py +27 -4
  92. mindspore/train/dataset_helper.py +10 -2
  93. mindspore/train/model.py +20 -0
  94. mindspore/train/serialization.py +8 -18
  95. mindspore/version.py +1 -1
  96. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +8 -6
  97. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +100 -100
  98. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
  99. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
  100. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/.commit_id CHANGED
@@ -1 +1 @@
1
- __commit_id__ = '[sha1]:8c86f33f,[branch]:(HEAD,origin/master,origin/HEAD,master)'
1
+ __commit_id__ = '[sha1]:01847825,[branch]:(HEAD,origin/r2.4.1,r2.4.1)'
mindspore/bin/cache_admin CHANGED
Binary file
Binary file
@@ -103,6 +103,12 @@ def _numpy_seed():
103
103
  return np.random.randint(low=1, high=(1 << 63), dtype=np.int64)
104
104
 
105
105
 
106
+ def _init_random_normal_inplace(mean, sigma, arr):
107
+ if sigma < 0:
108
+ raise ValueError("sigma < 0")
109
+ _random_normal(_numpy_seed(), arr, mean, sigma)
110
+
111
+
106
112
  def _init_random_normal(mean, sigma, shape):
107
113
  if sigma < 0:
108
114
  raise ValueError("sigma < 0")
@@ -111,12 +117,22 @@ def _init_random_normal(mean, sigma, shape):
111
117
  return data
112
118
 
113
119
 
120
+ def _init_random_uniform_inplace(a, b, arr):
121
+ _random_uniform(_numpy_seed(), arr, a, b)
122
+
123
+
114
124
  def _init_random_uniform(a, b, shape):
115
125
  data = np.ndarray(shape=shape, dtype=np.float32)
116
126
  _random_uniform(_numpy_seed(), data, a, b)
117
127
  return data
118
128
 
119
129
 
130
+ def _init_truncated_normal_inplace(a, b, mean, sigma, arr):
131
+ if sigma < 0:
132
+ raise ValueError("sigma < 0")
133
+ _truncated_normal(_numpy_seed(), arr, a, b, mean, sigma)
134
+
135
+
120
136
  def _init_truncated_normal(a, b, mean, sigma, shape):
121
137
  if sigma < 0:
122
138
  raise ValueError("sigma < 0")
@@ -298,9 +314,11 @@ class XavierNormal(Initializer):
298
314
  fan_in, fan_out = _calculate_fan_in_and_fan_out(arr.shape)
299
315
 
300
316
  std = self.gain * math.sqrt(2.0 / float(fan_in + fan_out))
301
- data = _init_random_normal(0, std, arr.shape)
302
-
303
- _assignment(arr, data)
317
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
318
+ _init_random_normal_inplace(0, std, arr)
319
+ else:
320
+ data = _init_random_normal(0, std, arr.shape)
321
+ _assignment(arr, data)
304
322
 
305
323
 
306
324
  @_register('xavier_uniform')
@@ -337,8 +355,11 @@ class XavierUniform(Initializer):
337
355
  def _initialize(self, arr):
338
356
  n_in, n_out = _calculate_fan_in_and_fan_out(arr.shape)
339
357
  boundary = self.gain * math.sqrt(6.0 / (n_in + n_out))
340
- data = _init_random_uniform(-boundary, boundary, arr.shape)
341
- _assignment(arr, data)
358
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
359
+ _init_random_uniform_inplace(-boundary, boundary, arr)
360
+ else:
361
+ data = _init_random_uniform(-boundary, boundary, arr.shape)
362
+ _assignment(arr, data)
342
363
 
343
364
 
344
365
  @_register('he_uniform')
@@ -386,8 +407,11 @@ class HeUniform(Initializer):
386
407
  gain = _calculate_gain(self.nonlinearity, self.negative_slope)
387
408
  std = gain / math.sqrt(fan)
388
409
  boundary = math.sqrt(3.0) * std
389
- data = _init_random_uniform(-boundary, boundary, arr.shape)
390
- _assignment(arr, data)
410
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
411
+ _init_random_uniform_inplace(-boundary, boundary, arr)
412
+ else:
413
+ data = _init_random_uniform(-boundary, boundary, arr.shape)
414
+ _assignment(arr, data)
391
415
 
392
416
 
393
417
  @_register('he_normal')
@@ -432,8 +456,11 @@ class HeNormal(Initializer):
432
456
  fan = _calculate_correct_fan(arr.shape, self.mode)
433
457
  gain = _calculate_gain(self.nonlinearity, self.negative_slope)
434
458
  std = gain / math.sqrt(fan)
435
- data = _init_random_normal(0, std, arr.shape)
436
- _assignment(arr, data)
459
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
460
+ _init_random_normal_inplace(0, std, arr)
461
+ else:
462
+ data = _init_random_normal(0, std, arr.shape)
463
+ _assignment(arr, data)
437
464
 
438
465
 
439
466
  class Constant(Initializer):
@@ -718,8 +745,11 @@ class Uniform(Initializer):
718
745
  self.scale = scale
719
746
 
720
747
  def _initialize(self, arr):
721
- tmp = _init_random_uniform(-self.scale, self.scale, arr.shape)
722
- _assignment(arr, tmp)
748
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
749
+ _init_random_uniform_inplace(-self.scale, self.scale, arr)
750
+ else:
751
+ tmp = _init_random_uniform(-self.scale, self.scale, arr.shape)
752
+ _assignment(arr, tmp)
723
753
 
724
754
 
725
755
  @_register()
@@ -749,8 +779,11 @@ class Normal(Initializer):
749
779
  self.mean = mean
750
780
 
751
781
  def _initialize(self, arr):
752
- data = _init_random_normal(self.mean, self.sigma, arr.shape)
753
- _assignment(arr, data)
782
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
783
+ _init_random_normal_inplace(self.mean, self.sigma, arr)
784
+ else:
785
+ data = _init_random_normal(self.mean, self.sigma, arr.shape)
786
+ _assignment(arr, data)
754
787
 
755
788
 
756
789
  @_register()
@@ -780,8 +813,11 @@ class TruncatedNormal(Initializer):
780
813
  self.b = b
781
814
 
782
815
  def _initialize(self, arr):
783
- tmp = _init_truncated_normal(self.a, self.b, self.mean, self.sigma, arr.shape)
784
- _assignment(arr, tmp)
816
+ if isinstance(arr, np.ndarray) and arr.dtype == np.float32:
817
+ _init_truncated_normal_inplace(self.a, self.b, self.mean, self.sigma, arr)
818
+ else:
819
+ tmp = _init_truncated_normal(self.a, self.b, self.mean, self.sigma, arr.shape)
820
+ _assignment(arr, tmp)
785
821
 
786
822
 
787
823
  def initializer(init, shape=None, dtype=mstype.float32):
@@ -22,6 +22,7 @@ import os
22
22
  import sys
23
23
  import math
24
24
  import numbers
25
+ from contextlib import contextmanager
25
26
  import numpy as np
26
27
  from mindspore import log as logger
27
28
  from mindspore.log import _LogActionOnce
@@ -54,6 +55,16 @@ PARAMETER_NAME_PREFIX_MAX_LEN = 1024
54
55
  _GLOBAL_PARAMETER_KEY = -1
55
56
 
56
57
 
58
+ @contextmanager
59
+ def no_init_parameters():
60
+ init_class = globals()["Parameter"]
61
+ setattr(init_class, "init_param", False)
62
+ try:
63
+ yield
64
+ finally:
65
+ setattr(init_class, "init_param", True)
66
+
67
+
57
68
  def _is_in_auto_parallel_mode():
58
69
  """Get parallel mode."""
59
70
  return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
@@ -243,7 +254,8 @@ class Parameter(Tensor_):
243
254
  def __new__(cls, default_input, *args, **kwargs):
244
255
  init_data_flag = bool(isinstance(default_input, Tensor) and default_input.has_init)
245
256
  rc = sys.getrefcount(default_input)
246
- input_class, *class_init_args = Parameter._get_parameter_new_args(default_input, rc)
257
+ init_param = getattr(cls, "init_param", True)
258
+ input_class, *class_init_args = Parameter._get_parameter_new_args(default_input, rc, init_param)
247
259
  new_type = Parameter._get_base_class(input_class)
248
260
  obj = input_class.__new__(new_type)
249
261
  input_class.__init__(obj, *class_init_args)
@@ -355,7 +367,7 @@ class Parameter(Tensor_):
355
367
  return new_type
356
368
 
357
369
  @staticmethod
358
- def _get_parameter_new_args(data, rc):
370
+ def _get_parameter_new_args(data, rc, init_param=True):
359
371
  """Set `set_data` of current `Parameter`."""
360
372
  if isinstance(data, bool):
361
373
  raise ValueError('Parameter data can not be `bool`')
@@ -370,8 +382,8 @@ class Parameter(Tensor_):
370
382
  return (Tensor, data.asnumpy(), mstype.qint4x2)
371
383
  return (Tensor, data.asnumpy())
372
384
 
373
- not_init_data = _is_role_sched() or (_is_role_pserver() and _cache_enable()
374
- ) or _is_in_auto_parallel_mode() or _is_parallel_mode()
385
+ not_init_data = not init_param or _is_role_sched() or (_is_role_pserver() and _cache_enable()) \
386
+ or _is_in_auto_parallel_mode() or _is_parallel_mode()
375
387
  if not_init_data:
376
388
  # do not init data while in auto parallel.
377
389
  return (Tensor, None, data.dtype, get_slice_shape(data.dtype, data.shape), data.init)
@@ -976,6 +988,8 @@ class Parameter(Tensor_):
976
988
  """
977
989
  if self.is_default_input_init and self.is_in_parallel != _is_in_auto_parallel_mode():
978
990
  raise RuntimeError("Must set or change parallel mode before any initializer Tensor created.")
991
+ if hasattr(self, "init_param") and self.init_param:
992
+ return self
979
993
  if self.init_mode is None:
980
994
  return self
981
995
  if self.inited_param is not None:
@@ -2896,8 +2896,13 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
2896
2896
  self.slice_shape_of_persistent_data_ = data_shape
2897
2897
  self.slice_num_of_persistent_data_ = slice_num_of_persistent_data
2898
2898
 
2899
+ from mindspore.common.initializer import Zero as ZeroInitializer
2900
+
2899
2901
  try:
2900
- data = np.ndarray(data_shape, dtype=mstype.dtype_to_nptype(self.dtype))
2902
+ if isinstance(self.init, ZeroInitializer):
2903
+ data = np.zeros(data_shape, dtype=mstype.dtype_to_nptype(self.dtype))
2904
+ else:
2905
+ data = np.ndarray(data_shape, dtype=mstype.dtype_to_nptype(self.dtype))
2901
2906
  except ValueError as e:
2902
2907
  msg = "Error shape={}".format(shape)
2903
2908
  logger.critical(msg)
@@ -2933,7 +2938,7 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
2933
2938
  self.init.seed, _ = self.seed
2934
2939
 
2935
2940
  with seed_context(self.init):
2936
- if slice_num_of_persistent_data == 1:
2941
+ if not isinstance(self.init, ZeroInitializer) and slice_num_of_persistent_data == 1:
2937
2942
  self.init(data)
2938
2943
  self.init = None
2939
2944
 
@@ -4749,7 +4754,6 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4749
4754
  """
4750
4755
  return tensor_operator_registry.get('lu_solve')(self, LU_data, LU_pivots)
4751
4756
 
4752
-
4753
4757
  def nextafter(self, other):
4754
4758
  r"""
4755
4759
  For details, please refer to :func:`mindspore.ops.nextafter`.
@@ -4763,7 +4767,6 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4763
4767
  validator.check_value_type('some', some, bool, 'Tensor.qr')
4764
4768
  return tensor_operator_registry.get('qr')(self, 'reduced' if some else 'complete')
4765
4769
 
4766
-
4767
4770
  def ormqr(self, input2, input3, left=True, transpose=False):
4768
4771
  r"""
4769
4772
  For details, please refer to :func:`mindspore.ops.ormqr`,
@@ -4771,7 +4774,6 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4771
4774
  """
4772
4775
  return tensor_operator_registry.get('ormqr')(self, input2, input3, left, transpose)
4773
4776
 
4774
-
4775
4777
  def masked_scatter(self, mask, x):
4776
4778
  r"""
4777
4779
  Returns a Tensor. Updates the value in the "self Tensor" with the `tensor` value according to the mask.
@@ -4812,7 +4814,6 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4812
4814
  """
4813
4815
  return tensor_operator_registry.get('masked_scatter')()(self, mask, x)
4814
4816
 
4815
-
4816
4817
  def index_put(self, indices, values, accumulate=False):
4817
4818
  r"""
4818
4819
  Returns a Tensor. According to the index number of `indices` ,
@@ -4865,7 +4866,6 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4865
4866
  _index_put = tensor_operator_registry.get('index_put')(0 if accumulate is False else 1)
4866
4867
  return _index_put(self, values, indices)
4867
4868
 
4868
-
4869
4869
  def move_to(self, to, blocking=True):
4870
4870
  r"""
4871
4871
  Copy Tensor to target device synchronously or asynchronously, default synchronously. only support PyNative mode.
@@ -4899,8 +4899,7 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
4899
4899
  mode = context.get_context("mode")
4900
4900
  if mode != context.PYNATIVE_MODE:
4901
4901
  raise ValueError(f"The method of 'move_to' only supported in pynative mode, but got: {mode}.")
4902
- return Tensor(Tensor_.move_to(self, to, blocking), device="CPU" if to == "CPU" else None)
4903
-
4902
+ return Tensor_.move_to(self, to, blocking)
4904
4903
 
4905
4904
  def _offload(self):
4906
4905
  r"""
@@ -4946,44 +4945,6 @@ def _vm_compare(*args):
4946
4945
  return Tensor(np.array(fn(y)))
4947
4946
 
4948
4947
 
4949
- def _check_sequence_shape(input_data):
4950
- """Check the shape of tensor input with type of sequence."""
4951
- max_dims_reached = False
4952
- max_ndim = 64 # corresponding to NPY_MAXDIMS
4953
- out_shape = [0]*max_ndim
4954
-
4955
- def check_shape_recursive(input_data, curr_ndim):
4956
- nonlocal max_dims_reached, max_ndim, out_shape
4957
- if curr_ndim > max_ndim:
4958
- return False
4959
- if not isinstance(input_data, (tuple, list)):
4960
- if max_dims_reached and curr_ndim != max_ndim:
4961
- max_ndim = curr_ndim
4962
- return False
4963
- max_dims_reached = True
4964
- max_ndim = curr_ndim
4965
- return True
4966
- if not max_dims_reached:
4967
- out_shape[curr_ndim] = len(input_data)
4968
- else:
4969
- if out_shape[curr_ndim] != len(input_data):
4970
- max_ndim = curr_ndim
4971
- return False
4972
- if not input_data:
4973
- # process empty list
4974
- if not check_shape_recursive(None, curr_ndim + 1):
4975
- return False
4976
- for data in input_data:
4977
- if not check_shape_recursive(data, curr_ndim + 1):
4978
- return False
4979
- return True
4980
-
4981
- if not check_shape_recursive(input_data, 0):
4982
- raise ValueError(f"When initializing a tensor with a sequence, the sequence has an inhomogeneous shape "
4983
- f"after {max_ndim} dimensions. The detected shape was {tuple(out_shape[:max_ndim])} "
4984
- f"+ inhomogeneous part.")
4985
-
4986
-
4987
4948
  def _check_tensor_input(input_data=None, dtype=None, shape=None, init=None):
4988
4949
  """Check the tensor input."""
4989
4950
  if input_data is not None and shape is not None:
@@ -4997,8 +4958,13 @@ def _check_tensor_input(input_data=None, dtype=None, shape=None, init=None):
4997
4958
  if isinstance(input_data, np.ndarray) and input_data.ndim >= 1 and input_data.size == 0:
4998
4959
  raise ValueError("input_data can not contain zero dimension.")
4999
4960
  if isinstance(input_data, (tuple, list)):
5000
- _check_sequence_shape(input_data)
5001
- if np.array(input_data).ndim >= 1 and np.array(input_data).size == 0:
4961
+ try:
4962
+ np_data = np.array(input_data)
4963
+ except ValueError as e:
4964
+ if "The requested array has an inhomogeneous shape" in str(e):
4965
+ raise TypeError(f"For Tensor, the input_data is {input_data} that contain unsupported element.")
4966
+ raise
4967
+ if np_data.ndim >= 1 and np_data.size == 0:
5002
4968
  raise ValueError("input_data can not contain zero dimension.")
5003
4969
 
5004
4970
  if shape is not None and not (hasattr(init, "__enable_zero_dim__") and init.__enable_zero_dim__) and 0 in shape:
@@ -209,7 +209,7 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=GlobalComm.WORLD_COMM_GROUP, async
209
209
  RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
210
210
 
211
211
  Supported Platforms:
212
- ``Ascend`` ``GPU`` ``CPU``
212
+ ``Ascend``
213
213
 
214
214
  Examples:
215
215
  .. note::
@@ -275,7 +275,7 @@ def all_gather_into_tensor(tensor, group=GlobalComm.WORLD_COMM_GROUP, async_op=F
275
275
  RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
276
276
 
277
277
  Supported Platforms:
278
- ``Ascend`` ``GPU``
278
+ ``Ascend``
279
279
 
280
280
  Examples:
281
281
  .. note::
@@ -349,7 +349,7 @@ def reduce_scatter_tensor(tensor, op=ReduceOp.SUM, group=GlobalComm.WORLD_COMM_G
349
349
  RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
350
350
 
351
351
  Supported Platforms:
352
- ``Ascend`` ``GPU``
352
+ ``Ascend``
353
353
 
354
354
  Examples:
355
355
  .. note::
@@ -909,7 +909,7 @@ def send(tensor, dst=0, group=GlobalComm.WORLD_COMM_GROUP, tag=0):
909
909
  ValueError: If the rank ID of the process is greater than the rank size of the communication group.
910
910
 
911
911
  Supported Platforms:
912
- ``Ascend`` ``GPU``
912
+ ``Ascend``
913
913
 
914
914
  Examples:
915
915
  .. note::
@@ -970,7 +970,7 @@ def recv(tensor, src=0, group=GlobalComm.WORLD_COMM_GROUP, tag=0):
970
970
  ValueError: If the rank ID of the process is greater than the rank size of the communication group.
971
971
 
972
972
  Supported Platforms:
973
- ``Ascend`` ``GPU``
973
+ ``Ascend``
974
974
 
975
975
  Examples:
976
976
  .. note::
@@ -1040,7 +1040,7 @@ def isend(tensor, dst=0, group=GlobalComm.WORLD_COMM_GROUP, tag=0):
1040
1040
  ValueError: If the rank ID of the process is greater than the rank size of the communication group.
1041
1041
 
1042
1042
  Supported Platforms:
1043
- ``Ascend`` ``GPU``
1043
+ ``Ascend``
1044
1044
 
1045
1045
  Examples:
1046
1046
  .. note::
@@ -1105,7 +1105,7 @@ def irecv(tensor, src=0, group=GlobalComm.WORLD_COMM_GROUP, tag=0):
1105
1105
  ValueError: If the rank ID of the process is greater than the rank size of the communication group.
1106
1106
 
1107
1107
  Supported Platforms:
1108
- ``Ascend`` ``GPU``
1108
+ ``Ascend``
1109
1109
 
1110
1110
  Examples:
1111
1111
  .. note::
mindspore/context.py CHANGED
@@ -1686,6 +1686,15 @@ def set_context(**kwargs):
1686
1686
  - 3: Optimize dataset reader with all scenes.
1687
1687
  - bias_add_comm_swap (bool): Enable node execution order swap communication operators and add operators
1688
1688
  if ``True``. Only 1-dimension bias node is supported. Default: ``False``.
1689
+ - enable_allreduce_slice_to_reducescatter (bool): Enable allreduce optimization. In the scenario where
1690
+ the batchmatmul model introduces allreduce in parallel, if the subsequent nodes are stridedslice
1691
+ operator with model parallel, allreduce will be optimized as reducescatter according to the identified
1692
+ patterns. Typical used in MoE module with groupwise alltoall. Default: ``False``.
1693
+ - enable_interleave_split_concat_branch (bool): Enable communication computation parallel optimization
1694
+ for branches formed by split and concat operators with ``enable_interleave`` attribute. It is typical
1695
+ used in MoE parallel scenario. After splitting the input data, each slice of data is processed by the
1696
+ MoE module, and then the branch results are concatenated. When the optimization is enable,
1697
+ communication and computation will be executed in parallel between branches. Default: ``False``.
1689
1698
  - host_scheduling_max_threshold(int): The max threshold to control whether the dynamic shape process is
1690
1699
  used when run the static graph, the default value is 0. When the number of operations in the static graph
1691
1700
  is less than the max threshold, this graph will be executed in dynamic shape process. In large model
@@ -161,5 +161,18 @@ inline std::string FormatEnumToString(mindspore::Format format) {
161
161
  }
162
162
  return names[format];
163
163
  }
164
+
165
+ inline Format FromStrToEnum(const std::string &format_str) {
166
+ if (format_str == "DefaultFormat") {
167
+ return mindspore::Format::DEFAULT_FORMAT;
168
+ }
169
+ const auto &names = GetFormatNames();
170
+ for (size_t i = 0; i < names.size(); ++i) {
171
+ if (names[i] == format_str) {
172
+ return static_cast<mindspore::Format>(i);
173
+ }
174
+ }
175
+ return mindspore::Format::DEFAULT_FORMAT;
176
+ }
164
177
  } // namespace mindspore
165
178
  #endif // MINDSPORE_CORE_MINDAPI_BASE_FORMAT_H_
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -2,16 +2,16 @@
2
2
  "binList": [
3
3
  {
4
4
  "simplifiedKey": [
5
- "AllFinite/d=0,p=0/0,2/12,2",
6
- "AllFinite/d=1,p=0/0,2/12,2"
5
+ "AllFinite/d=0,p=0/1,2/12,2",
6
+ "AllFinite/d=1,p=0/1,2/12,2"
7
7
  ],
8
- "staticKey": "d0f82552295948866b5bab1c0ef5a9cd9662ee964af71eeea40b0c8d27c08835",
8
+ "staticKey": "aaee380ee48b9c261d816baac1f6fc0b820ce3b2255e1eff19382df469b7e6de",
9
9
  "int64Mode": false,
10
10
  "inputs": [
11
11
  {
12
12
  "name": "gradient",
13
13
  "index": 0,
14
- "dtype": "float32",
14
+ "dtype": "float16",
15
15
  "format": "ND",
16
16
  "paramType": "required",
17
17
  "shape": [
@@ -32,21 +32,21 @@
32
32
  }
33
33
  ],
34
34
  "binInfo": {
35
- "jsonFilePath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json"
35
+ "jsonFilePath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json"
36
36
  }
37
37
  },
38
38
  {
39
39
  "simplifiedKey": [
40
- "AllFinite/d=0,p=0/1,2/12,2",
41
- "AllFinite/d=1,p=0/1,2/12,2"
40
+ "AllFinite/d=0,p=0/0,2/12,2",
41
+ "AllFinite/d=1,p=0/0,2/12,2"
42
42
  ],
43
- "staticKey": "aaee380ee48b9c261d816baac1f6fc0b820ce3b2255e1eff19382df469b7e6de",
43
+ "staticKey": "d0f82552295948866b5bab1c0ef5a9cd9662ee964af71eeea40b0c8d27c08835",
44
44
  "int64Mode": false,
45
45
  "inputs": [
46
46
  {
47
47
  "name": "gradient",
48
48
  "index": 0,
49
- "dtype": "float16",
49
+ "dtype": "float32",
50
50
  "format": "ND",
51
51
  "paramType": "required",
52
52
  "shape": [
@@ -67,7 +67,7 @@
67
67
  }
68
68
  ],
69
69
  "binInfo": {
70
- "jsonFilePath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json"
70
+ "jsonFilePath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json"
71
71
  }
72
72
  },
73
73
  {
@@ -5,23 +5,23 @@
5
5
  "binaryList": [
6
6
  {
7
7
  "coreType": 2,
8
- "simplifiedKey": "AllFinite/d=0,p=0/0,2/12,2",
9
- "binPath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o"
8
+ "simplifiedKey": "AllFinite/d=0,p=0/1,2/12,2",
9
+ "binPath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o"
10
10
  },
11
11
  {
12
12
  "coreType": 2,
13
- "simplifiedKey": "AllFinite/d=1,p=0/0,2/12,2",
14
- "binPath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o"
13
+ "simplifiedKey": "AllFinite/d=1,p=0/1,2/12,2",
14
+ "binPath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o"
15
15
  },
16
16
  {
17
17
  "coreType": 2,
18
- "simplifiedKey": "AllFinite/d=0,p=0/1,2/12,2",
19
- "binPath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o"
18
+ "simplifiedKey": "AllFinite/d=0,p=0/0,2/12,2",
19
+ "binPath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o"
20
20
  },
21
21
  {
22
22
  "coreType": 2,
23
- "simplifiedKey": "AllFinite/d=1,p=0/1,2/12,2",
24
- "binPath": "ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o"
23
+ "simplifiedKey": "AllFinite/d=1,p=0/0,2/12,2",
24
+ "binPath": "ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o"
25
25
  },
26
26
  {
27
27
  "coreType": 2,
@@ -274,7 +274,7 @@ class CustomOOC():
274
274
  else:
275
275
  with open('build.log', 'r') as file:
276
276
  for line in file:
277
- logger.debug(line.strip())
277
+ logger.error(line.strip())
278
278
  raise RuntimeError("Compile failed! Please see build.log in current directory for detail info.")
279
279
 
280
280
  def compile(self):
@@ -77,16 +77,16 @@ typedef struct {
77
77
  } AsdRtModuleInfo;
78
78
 
79
79
  typedef struct {
80
- uint16_t addrOffset{0};
81
- uint16_t dataOffset{0};
80
+ uint32_t addrOffset{0};
81
+ uint32_t dataOffset{0};
82
82
  } RtHostInputInfoT;
83
83
 
84
84
  typedef struct {
85
85
  void *args{nullptr};
86
86
  RtHostInputInfoT *hostInputInfoPtr{nullptr};
87
87
  uint32_t argsSize{0};
88
- uint16_t tilingAddrOffset{0};
89
- uint16_t tilingDataOffset{0};
88
+ uint32_t tilingAddrOffset{0};
89
+ uint32_t tilingDataOffset{0};
90
90
  uint16_t hostInputInfoNum{0};
91
91
  uint8_t hasTiling{0};
92
92
  uint8_t isNoNeedH2DCopy{0};
@@ -111,4 +111,4 @@ typedef struct {
111
111
  #ifdef __cplusplus
112
112
  }
113
113
  #endif
114
- #endif
114
+ #endif
@@ -34,6 +34,7 @@ class AcmeOp {
34
34
  AcmeStatus Init();
35
35
 
36
36
  virtual AcmeStatus UpdateShape(const ShapeInfoList &inputs_shape, const ShapeInfoList &outputs_shape);
37
+ virtual AcmeStatus UpdateParam(const void *) { return kAcmeOk; }
37
38
 
38
39
  size_t GetTilingSize() const;
39
40
  virtual std::vector<size_t> GetWorkspaceSize() const;
@@ -51,6 +51,7 @@ class PagedAttentionOp : public MultiImplsOp {
51
51
 
52
52
  AsdOps::Any BuildAsdParam() override;
53
53
  AcmeStatus UpdateShape(const ShapeInfoList &inputs_shape, const ShapeInfoList &outputs_shape) override;
54
+ AcmeStatus UpdateParam(const void *) override;
54
55
  const std::string &TargetKernelName() const override { return target_kernel_name_; }
55
56
  ShapeInfoList InferShape(const ShapeInfoList &inputs_shape) const override;
56
57
 
@@ -66,7 +67,7 @@ class PagedAttentionOp : public MultiImplsOp {
66
67
  uint32_t GetLaunchCoreNumAcme() const override;
67
68
  AcmeStatus CreateAsdTensor();
68
69
  AcmeStatus UpdateAsdParam();
69
- AcmeStatus UpdateAsdTensor(ShapeInfoList *asd_input_shape, ShapeInfoList *asd_output_shape);
70
+ AcmeStatus UpdateAsdTensor();
70
71
  AcmeStatus CheckAsdopSupport() const;
71
72
 
72
73
  private:
@@ -74,6 +75,10 @@ class PagedAttentionOp : public MultiImplsOp {
74
75
  PagedAttentionParam param_;
75
76
  InputsDescList asd_inputs_;
76
77
  OutputsDescList asd_outputs_;
78
+ InputsImmutableInfoList asd_inputs_ii_;
79
+ InputsImmutableInfoList asd_outputs_ii_;
80
+ ShapeInfoList asd_input_shape_;
81
+ ShapeInfoList asd_output_shape_;
77
82
  uint64_t tiling_key_{0};
78
83
  bool is_custom_quant_{false};
79
84
  bool has_mask_{false};