mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (196) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +1 -1
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +2 -2
  7. mindspore/_extends/builtin_operations.py +3 -3
  8. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  9. mindspore/_extends/parse/__init__.py +3 -3
  10. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -0
  11. mindspore/_extends/parse/parser.py +22 -28
  12. mindspore/_extends/parse/standard_method.py +1 -15
  13. mindspore/_extends/pijit/pijit_func_white_list.py +5 -2
  14. mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
  15. mindspore/amp.py +18 -0
  16. mindspore/avcodec-59.dll +0 -0
  17. mindspore/avdevice-59.dll +0 -0
  18. mindspore/avfilter-8.dll +0 -0
  19. mindspore/avformat-59.dll +0 -0
  20. mindspore/avutil-57.dll +0 -0
  21. mindspore/common/__init__.py +12 -18
  22. mindspore/common/_tensor_cpp_method.py +1 -1
  23. mindspore/common/_tensor_docs.py +38 -102
  24. mindspore/common/_utils.py +1 -9
  25. mindspore/common/api.py +106 -155
  26. mindspore/common/{dynamic_shape/auto_dynamic_shape.py → auto_dynamic_shape.py} +23 -17
  27. mindspore/common/dtype.py +57 -98
  28. mindspore/common/dump.py +1 -1
  29. mindspore/common/file_system.py +9 -59
  30. mindspore/common/hook_handle.py +3 -22
  31. mindspore/common/np_dtype.py +3 -3
  32. mindspore/common/parameter.py +20 -4
  33. mindspore/common/recompute.py +4 -2
  34. mindspore/common/tensor.py +52 -38
  35. mindspore/communication/_hccl_management.py +297 -0
  36. mindspore/context.py +21 -15
  37. mindspore/dataset/__init__.py +1 -1
  38. mindspore/dataset/audio/transforms.py +1 -1
  39. mindspore/dataset/core/config.py +1 -35
  40. mindspore/dataset/engine/datasets.py +315 -330
  41. mindspore/dataset/engine/datasets_user_defined.py +22 -38
  42. mindspore/dataset/transforms/c_transforms.py +2 -2
  43. mindspore/dataset/transforms/transforms.py +3 -3
  44. mindspore/dataset/vision/__init__.py +1 -1
  45. mindspore/dataset/vision/py_transforms.py +8 -8
  46. mindspore/dataset/vision/transforms.py +5 -17
  47. mindspore/dataset/vision/utils.py +21 -632
  48. mindspore/device_context/ascend/op_tuning.py +1 -35
  49. mindspore/dnnl.dll +0 -0
  50. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -3
  51. mindspore/include/api/cell.h +4 -28
  52. mindspore/include/api/cfg.h +7 -24
  53. mindspore/include/api/context.h +0 -1
  54. mindspore/include/api/delegate.h +2 -0
  55. mindspore/include/api/dual_abi_helper.h +19 -100
  56. mindspore/include/api/graph.h +1 -14
  57. mindspore/include/api/kernel.h +3 -16
  58. mindspore/include/api/kernel_api.h +1 -9
  59. mindspore/include/api/metrics/accuracy.h +0 -9
  60. mindspore/include/api/model.h +1 -5
  61. mindspore/include/api/model_group.h +0 -4
  62. mindspore/include/api/model_parallel_runner.h +0 -2
  63. mindspore/include/api/status.h +10 -48
  64. mindspore/include/api/types.h +1 -6
  65. mindspore/include/dataset/constants.h +0 -9
  66. mindspore/jpeg62.dll +0 -0
  67. mindspore/mindrecord/tools/cifar10.py +2 -3
  68. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -5
  69. mindspore/mindspore_backend_common.dll +0 -0
  70. mindspore/mindspore_backend_manager.dll +0 -0
  71. mindspore/mindspore_common.dll +0 -0
  72. mindspore/mindspore_core.dll +0 -0
  73. mindspore/mindspore_cpu_res_manager.dll +0 -0
  74. mindspore/mindspore_dump.dll +0 -0
  75. mindspore/mindspore_frontend.dll +0 -0
  76. mindspore/mindspore_glog.dll +0 -0
  77. mindspore/mindspore_memory_pool.dll +0 -0
  78. mindspore/mindspore_ms_backend.dll +0 -0
  79. mindspore/mindspore_ops.dll +0 -0
  80. mindspore/mindspore_ops_host.dll +0 -0
  81. mindspore/mindspore_ops_kernel_common.dll +0 -0
  82. mindspore/mindspore_profiler.dll +0 -0
  83. mindspore/mindspore_pyboost.dll +0 -0
  84. mindspore/mindspore_pynative.dll +0 -0
  85. mindspore/mindspore_res_manager.dll +0 -0
  86. mindspore/mindspore_runtime_pipeline.dll +0 -0
  87. mindspore/mint/distributed/__init__.py +0 -4
  88. mindspore/mint/distributed/distributed.py +14 -217
  89. mindspore/mint/nn/layer/_functions.py +2 -1
  90. mindspore/mint/nn/layer/conv.py +6 -6
  91. mindspore/mint/nn/layer/normalization.py +3 -3
  92. mindspore/nn/cell.py +174 -216
  93. mindspore/nn/layer/activation.py +2 -4
  94. mindspore/nn/layer/basic.py +13 -7
  95. mindspore/nn/layer/image.py +1 -1
  96. mindspore/nn/optim/adam.py +3 -1
  97. mindspore/nn/optim/lamb.py +3 -1
  98. mindspore/nn/optim/tft_wrapper.py +3 -2
  99. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  100. mindspore/nn/wrap/cell_wrapper.py +5 -39
  101. mindspore/nn/wrap/grad_reducer.py +15 -0
  102. mindspore/numpy/array_creations.py +2 -2
  103. mindspore/numpy/utils_const.py +1 -1
  104. mindspore/opencv_core452.dll +0 -0
  105. mindspore/opencv_imgcodecs452.dll +0 -0
  106. mindspore/opencv_imgproc452.dll +0 -0
  107. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  108. mindspore/ops/_op_impl/cpu/__init__.py +0 -1
  109. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +2 -12
  110. mindspore/ops/auto_generate/gen_extend_func.py +4 -4
  111. mindspore/ops/auto_generate/gen_ops_def.py +16 -290
  112. mindspore/ops/auto_generate/gen_ops_prim.py +76 -563
  113. mindspore/ops/composite/base.py +1 -1
  114. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  115. mindspore/ops/function/__init__.py +0 -1
  116. mindspore/ops/function/array_func.py +6 -10
  117. mindspore/ops/function/debug_func.py +2 -4
  118. mindspore/ops/function/grad/grad_func.py +12 -4
  119. mindspore/ops/function/math_func.py +32 -44
  120. mindspore/ops/function/nn_func.py +20 -18
  121. mindspore/ops/functional.py +1 -2
  122. mindspore/ops/functional_overload.py +12 -23
  123. mindspore/ops/operations/_inner_ops.py +12 -11
  124. mindspore/ops/operations/array_ops.py +50 -4
  125. mindspore/ops/operations/comm_ops.py +15 -1
  126. mindspore/ops/operations/custom_ops.py +4 -10
  127. mindspore/ops/operations/debug_ops.py +6 -6
  128. mindspore/ops/operations/manually_defined/ops_def.py +12 -12
  129. mindspore/ops/operations/math_ops.py +5 -5
  130. mindspore/ops/operations/nn_ops.py +1 -1
  131. mindspore/ops/primitive.py +10 -3
  132. mindspore/ops/tensor_method.py +7 -16
  133. mindspore/ops_generate/pyboost/gen_pyboost_func.py +16 -0
  134. mindspore/parallel/_auto_parallel_context.py +15 -5
  135. mindspore/parallel/_parallel_serialization.py +2 -3
  136. mindspore/parallel/_ps_context.py +2 -2
  137. mindspore/parallel/_transformer/transformer.py +4 -4
  138. mindspore/parallel/_utils.py +11 -5
  139. mindspore/parallel/auto_parallel.py +9 -23
  140. mindspore/parallel/checkpoint_transform.py +0 -2
  141. mindspore/parallel/cluster/process_entity/_api.py +1 -4
  142. mindspore/parallel/cluster/run.py +3 -5
  143. mindspore/parallel/function/reshard_func.py +5 -6
  144. mindspore/parallel/nn/parallel_cell_wrapper.py +3 -40
  145. mindspore/parallel/nn/parallel_grad_reducer.py +8 -0
  146. mindspore/parallel/shard.py +21 -7
  147. mindspore/parallel/transform_safetensors.py +4 -10
  148. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +9 -10
  149. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
  150. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  151. mindspore/profiler/common/path_manager.py +0 -9
  152. mindspore/profiler/common/profiler_context.py +2 -25
  153. mindspore/profiler/common/profiler_meta_data.py +0 -1
  154. mindspore/profiler/common/profiler_op_analyse.py +6 -10
  155. mindspore/{ops/_op_impl/cpu/joinedstr_op.py → profiler/common/validator/__init__.py} +1 -15
  156. mindspore/profiler/common/validator/validate_path.py +84 -0
  157. mindspore/profiler/dynamic_profiler.py +46 -91
  158. mindspore/profiler/envprofiler.py +5 -30
  159. mindspore/profiler/experimental_config.py +1 -16
  160. mindspore/profiler/platform/cpu_profiler.py +4 -10
  161. mindspore/profiler/platform/npu_profiler.py +1 -1
  162. mindspore/profiler/profiler.py +145 -193
  163. mindspore/profiler/profiler_action_controller.py +1 -1
  164. mindspore/profiler/profiler_interface.py +2 -2
  165. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  166. mindspore/runtime/__init__.py +4 -6
  167. mindspore/runtime/executor.py +0 -27
  168. mindspore/runtime/memory.py +0 -1
  169. mindspore/runtime/thread_bind_core.py +1 -1
  170. mindspore/swresample-4.dll +0 -0
  171. mindspore/swscale-6.dll +0 -0
  172. mindspore/tinyxml2.dll +0 -0
  173. mindspore/train/_utils.py +3 -3
  174. mindspore/train/amp.py +3 -0
  175. mindspore/train/callback/_callback.py +1 -2
  176. mindspore/train/callback/_checkpoint.py +8 -1
  177. mindspore/train/callback/_flops_collector.py +6 -10
  178. mindspore/train/callback/_train_fault_tolerance.py +7 -3
  179. mindspore/train/data_sink.py +4 -4
  180. mindspore/train/dataset_helper.py +5 -5
  181. mindspore/train/model.py +20 -4
  182. mindspore/train/serialization.py +15 -35
  183. mindspore/train/train_thor/model_thor.py +2 -2
  184. mindspore/turbojpeg.dll +0 -0
  185. mindspore/utils/hooks.py +81 -0
  186. mindspore/utils/utils.py +8 -8
  187. mindspore/version.py +1 -1
  188. {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +1 -1
  189. {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +193 -192
  190. mindspore/_extends/parallel_compile/akg_compiler/custom.py +0 -1109
  191. mindspore/common/dynamic_shape/__init__.py +0 -0
  192. mindspore/common/dynamic_shape/enable_dynamic.py +0 -197
  193. /mindspore/common/{dynamic_shape/_auto_dynamic.py → _auto_dynamic.py} +0 -0
  194. {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  195. {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  196. {mindspore-2.7.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -628,6 +628,13 @@ class ModelCheckpoint(Callback):
628
628
  if "step_num" in self._append_dict:
629
629
  self._append_dict["step_num"] = self._append_step_num + step_num
630
630
 
631
+ def _update_save_step(self, cb_params):
632
+ """update step if used async d2h copy"""
633
+ step_num_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num + 1)
634
+ if self._d2h_async and self._run_mode == context.GRAPH_MODE:
635
+ step_num_in_epoch -= 1
636
+ return step_num_in_epoch
637
+
631
638
  def _save_ckpt(self, cb_params, force_to_save=False):
632
639
  """Save checkpoint files."""
633
640
  if cb_params.cur_step_num == self._last_triggered_step:
@@ -638,7 +645,7 @@ class ModelCheckpoint(Callback):
638
645
  self._flush_from_cache(cb_params)
639
646
 
640
647
  save_ckpt = self._check_save_ckpt(cb_params, force_to_save)
641
- step_num_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num + 1)
648
+ step_num_in_epoch = self._update_save_step(cb_params)
642
649
 
643
650
  if save_ckpt:
644
651
 
@@ -31,6 +31,7 @@ from mindspore.communication.management import (create_group, get_group_size,
31
31
  from mindspore.parallel._auto_parallel_context import auto_parallel_context
32
32
  from mindspore.ops import operations as P
33
33
  from mindspore.common import Tensor
34
+ from mindspore import context
34
35
  import mindspore.nn as nn
35
36
 
36
37
 
@@ -151,21 +152,16 @@ class FlopsUtilizationCollector(Callback):
151
152
  """
152
153
  Check whether FlopsUtilizationCollector is working in the current environment
153
154
  """
155
+ if context.get_context("mode") != context.GRAPH_MODE:
156
+ if self.verbose:
157
+ raise ValueError("FlopsUtilizationCollector now only support graph mode.")
158
+ logger.info("FlopsUtilizationCollector now only support graph mode.")
159
+ return False
154
160
  cb_params = run_context.original_args()
155
161
  if cb_params.mode == 'train':
156
162
  network = cb_params.train_network
157
- if not network.compiled:
158
- if self.verbose:
159
- raise ValueError("FlopsUtilizationCollector now only support graph mode.")
160
- logger.info("FlopsUtilizationCollector now only support graph mode.")
161
- return False
162
163
  elif cb_params.mode == 'eval':
163
164
  network = cb_params.eval_network
164
- if not network.compiled:
165
- if self.verbose:
166
- raise ValueError("FlopsUtilizationCollector now only support graph mode.")
167
- logger.info("FlopsUtilizationCollector now only support graph mode.")
168
- return False
169
165
  else:
170
166
  if self.verbose:
171
167
  raise ValueError('FlopsUtilizationCollector only support train and eval mode!')
@@ -167,6 +167,7 @@ def _tft_stop_callback(args, cb_ctx):
167
167
  """ Callback used for TFT stop function."""
168
168
  logger.warning(f"Enter _tft_stop_callback device_id: {cb_ctx.device_id}")
169
169
  _stop_device(cb_ctx.device_id)
170
+ cb_ctx.stop_been_called = True
170
171
  if (not cb_ctx.is_uce_rank) and (not cb_ctx._is_params_consistent()): # pylint: disable=W0212
171
172
  raise RuntimeError("Can't stop device, because training parameters are left in inconsistent state!")
172
173
  cb_ctx.is_uce_rank = False
@@ -191,7 +192,7 @@ def _tft_rebuild_sub_groups(fault_ranks, args, ctx):
191
192
  class TrainFaultTolerance(Callback):
192
193
  """
193
194
  This callback is used to enable the TFT feature
194
- `MindIO TFT <https://www.hiascend.com/document/detail/zh/mindx-dl/600/clusterscheduling/ref/mindiottp/mindiotft001.html>`_
195
+ `MindIO TFT <https://www.hiascend.com/document/detail/zh/mindx-dl/60rc2/mindio/mindiottp/mindiottp001.html>`_
195
196
  and will execute TFT operations during training process, such as TFT init, report and exception handle.
196
197
 
197
198
  Note:
@@ -339,6 +340,7 @@ class TrainFaultTolerance(Callback):
339
340
  self.learning_rate = None
340
341
  self.has_init_replica = False
341
342
  self.is_uce_rank = False
343
+ self.stop_been_called = False
342
344
 
343
345
  self.assign = mindspore.ops.Assign()
344
346
  self.g_one = Parameter(Tensor([1], dtype=mstype.int32))
@@ -380,9 +382,11 @@ class TrainFaultTolerance(Callback):
380
382
  _tft_handler.init(config=None)
381
383
  self.tft = _tft_handler.get_tft()
382
384
  logger.warning(f"TFT handle init ok.")
385
+ mode = context.get_context("mode")
383
386
  device_target = context.get_context("device_target")
384
- if device_target != "Ascend":
385
- raise ValueError(f"MindIO adataper only support on Ascend device but got device {device_target}!")
387
+ if device_target != "Ascend" or mode != context.GRAPH_MODE:
388
+ raise ValueError(f"MindIO adataper only support on Ascend device with GRAPH Mode!"
389
+ f"device:{device_target}, run mode: {mode}")
386
390
 
387
391
  def _is_params_consistent(self):
388
392
  for key, param in self.cb_params.train_network.parameters_and_names():
@@ -16,7 +16,7 @@
16
16
  from functools import wraps
17
17
  import mindspore.ops as ops
18
18
  from mindspore import context
19
- from mindspore.common.dtype import _pytype_to_dtype
19
+ from mindspore.common.dtype import pytype_to_dtype
20
20
  from mindspore.common.api import jit
21
21
  from mindspore.train._utils import _exec_datagraph, _get_types_and_shapes, enable_data_broadcast
22
22
  from mindspore.train.dataset_helper import _has_dynamic_shape, _check_inputs
@@ -61,7 +61,7 @@ def _init_sink_dataset(dataset, sink_size, input_signature, create_info):
61
61
  _check_inputs(input_signature, dataset_shapes, dataset_types)
62
62
 
63
63
  queue_name = transfer_dataset.queue_name
64
- if _need_to_full():
64
+ if _need_to_full() and context.get_context('mode') == context.GRAPH_MODE:
65
65
  device_num = _get_device_num() // _get_pipeline_stages()
66
66
  dataset_shapes = _to_full_shapes(dataset_shapes, device_num)
67
67
  next_op = ops.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name)
@@ -94,12 +94,12 @@ def _get_next_op(dataset, ori_next_op, is_info_queue):
94
94
 
95
95
  queue_name = dataset.__transfer_dataset__.queue_name
96
96
  dataset_types, dataset_shapes = dataset.__transfer_dataset__.get_data_info()
97
- dataset_types = [_pytype_to_dtype(x) for x in dataset_types] # pylint:disable=protected-access
97
+ dataset_types = [pytype_to_dtype(x) for x in dataset_types]
98
98
  key = str(dataset_types) + str(dataset_shapes)
99
99
  if key in dataset.__sink_aux__.next_ops:
100
100
  next_op = dataset.__sink_aux__.next_ops[key]
101
101
  else:
102
- if _need_to_full():
102
+ if _need_to_full() and context.get_context('mode') == context.GRAPH_MODE:
103
103
  device_num = _get_device_num() // _get_pipeline_stages()
104
104
  dataset_shapes = _to_full_shapes(dataset_shapes, device_num)
105
105
  next_op = ops.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name)
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2025 Huawei Technologies Co., Ltd
1
+ # Copyright 2020 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -20,8 +20,8 @@ import copy
20
20
 
21
21
  from mindspore import _checkparam as Validator
22
22
  from mindspore import log as logger
23
- from mindspore.common.dynamic_shape._auto_dynamic import is_auto_dynamic, convert_new_shapes
24
- from mindspore.common.dtype import _pytype_to_dtype
23
+ from mindspore.common._auto_dynamic import is_auto_dynamic, convert_new_shapes
24
+ from mindspore.common.dtype import pytype_to_dtype
25
25
  from mindspore.common.api import _cell_graph_executor, _is_args_fullmode, ARG_SPECIFIED
26
26
  from mindspore.common._utils import is_shape_unknown
27
27
  from mindspore.dataset.core import config as dataset_config
@@ -34,7 +34,7 @@ from mindspore.parallel._utils import _get_device_num, _get_global_rank, _need_t
34
34
  _origin_shapes, _dynamic_shape_for_dataset
35
35
  from mindspore.parallel._ps_context import _is_role_sched
36
36
  from mindspore.ops import operations as P
37
- from mindspore.common.dynamic_shape.auto_dynamic_shape import _auto_dynamic_shape
37
+ from mindspore.common.auto_dynamic_shape import _auto_dynamic_shape
38
38
 
39
39
 
40
40
  def _send_data(dataset, epoch_num):
@@ -275,7 +275,7 @@ def connect_network_with_dataset(network, dataset_helper):
275
275
  # Need to do full_batch for shapes which also do in the _DatasetIterMSLoopSink
276
276
  if _need_to_full():
277
277
  dataset_shapes = _to_full_shapes(dataset_shapes, _get_device_num() // _get_pipeline_stages())
278
- dataset_types = [_pytype_to_dtype(x) for x in dataset_types] # pylint:disable=protected-access
278
+ dataset_types = [pytype_to_dtype(x) for x in dataset_types]
279
279
  if not is_dynamic:
280
280
  dataset_shapes = _auto_dynamic_shape.auto_dynamic_generate_compile_args(dataset_shapes, True)
281
281
  key = str(dataset_types) + str(dataset_shapes)
mindspore/train/model.py CHANGED
@@ -156,7 +156,11 @@ def _handle_exception_info(obj, uce_env, tft, e):
156
156
  tft.tft_report_error(tft.ReportState.RS_UCE.value)
157
157
  elif "HCCEError" in e_str:
158
158
  logger.warning("uce wrapper caught HCCEError")
159
- tft.tft_report_error(tft.ReportState.RS_HCCL_FAILED.value)
159
+ if obj.stop_been_called:
160
+ logger.warning("Received HCCEError after force stop been called, so report force stopped error to MindIO.")
161
+ tft.tft_report_error(tft.ReportState.RS_NORMAL.value)
162
+ else:
163
+ tft.tft_report_error(tft.ReportState.RS_HCCL_FAILED.value)
160
164
  elif "ForceStopError" in e_str:
161
165
  logger.warning("uce wrapper caught RuntimeError ForceStopError")
162
166
  force_stop_err = tft.ReportState.RS_NORMAL.value
@@ -266,6 +270,7 @@ def _handle_tft(func):
266
270
  ret = obj.tft.tft_wait_next_action()
267
271
  if ret == obj.tft.Action.EXIT.value:
268
272
  raise e
273
+ obj.stop_been_called = False
269
274
  repair_step = obj.tft.tft_get_repair_step()
270
275
  logger.warning(
271
276
  "uce wrapper caught repair finish REPAIR STEP: {} batch_num:{}".format(repair_step,
@@ -303,6 +308,9 @@ def _check_tft():
303
308
  ascend_target = MSContext.get_instance().get_ascend_soc_version()
304
309
  if ascend_target == 'ascend910':
305
310
  raise ValueError("TFT is not supported when using ascend910")
311
+ ms_mode = context.get_context("mode")
312
+ if ms_mode != mindspore.GRAPH_MODE:
313
+ raise ValueError("TFT is only supported in GRAPH_MODE")
306
314
  jit_level = context.get_context("jit_level")
307
315
  if jit_level == "O2" and ("UCE:1" in tft_env or "ARF:1" in tft_env):
308
316
  raise ValueError("TFT is not supported when using jit_level == O2")
@@ -812,7 +820,7 @@ class Model:
812
820
  """
813
821
  if os.environ.get("MS_ENABLE_CKPT_D2H_ASYNC") != "1":
814
822
  return
815
- if context.get_context("device_target") == "Ascend":
823
+ if (context.get_context("mode") == context.GRAPH_MODE) and (context.get_context("device_target") == "Ascend"):
816
824
  cb_params.need_ckpt, cb_params.save_checkpoint_steps, \
817
825
  cb_params.last_triggered_step = self._check_need_ckpt(cb_params.list_callback)
818
826
  logger.info(f"need_ckpt:{cb_params.need_ckpt},"
@@ -880,8 +888,8 @@ class Model:
880
888
  sink_size (int): Control the amount of data in each sink. Default: -1.
881
889
  epoch (int): Total number of iterations on the data. Default: 1.
882
890
  """
883
- if context.get_context("device_target") != "Ascend":
884
- raise RuntimeError('Pre-init process only supports Ascend target currently.')
891
+ if context.get_context("mode") != context.GRAPH_MODE or context.get_context("device_target") != "Ascend":
892
+ raise RuntimeError('Pre-init process only supports GRAPH MODE and Ascend target currently.')
885
893
 
886
894
  if not train_dataset and not valid_dataset:
887
895
  raise ValueError("The argument 'train_dataset' and 'valid_dataset' can not both be None or empty.")
@@ -1212,6 +1220,8 @@ class Model:
1212
1220
  if not enable_recovery:
1213
1221
  self.enable_recovery = False
1214
1222
  else:
1223
+ if context.get_context("mode") != context.GRAPH_MODE:
1224
+ raise RuntimeError("Recovery for training only support graph mode currently.")
1215
1225
  self.enable_recovery = enable_recovery and _is_role_worker()
1216
1226
 
1217
1227
  def _check_need_load_ckpt(self, cb_params, dataset_size, sink_size=-1):
@@ -2189,6 +2199,9 @@ class Model:
2189
2199
  dataset_sink_mode (bool): Determines whether to pass the data through dataset channel.
2190
2200
  sink_size (int): Control the amount of data in each sink.
2191
2201
  """
2202
+ if context.get_context("mode") != context.GRAPH_MODE:
2203
+ raise RuntimeError("Pre-compile process that generate parameter layout for the train network "
2204
+ "only supports GRAPH MODE and Ascend target currently.")
2192
2205
  if _get_parallel_mode() not in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
2193
2206
  raise RuntimeError("'infer_train_layout' only supports 'semi_auto_parallel' and 'auto_parallel' "
2194
2207
  "mode, but got {}.".format(_get_parallel_mode()))
@@ -2348,6 +2361,9 @@ class Model:
2348
2361
  >>> predict_map = model.infer_predict_layout(inputs)
2349
2362
  """
2350
2363
  _init_auto_parallel_context(self._network)
2364
+ if context.get_context("mode") != context.GRAPH_MODE:
2365
+ raise RuntimeError("Pre-compile process that generate parameter layout for the predict network "
2366
+ "only supports GRAPH MODE and Ascend target currently.")
2351
2367
  if _get_parallel_mode() not in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
2352
2368
  raise RuntimeError('Infer predict layout only supports semi auto parallel and auto parallel mode.')
2353
2369
  _parallel_predict_check()
@@ -52,6 +52,7 @@ from mindspore.log import vlog_print
52
52
  from mindspore._checkparam import check_input_data, check_input_dataset
53
53
  from mindspore import _checkparam as Validator
54
54
  from mindspore.common import dtype as mstype
55
+ from mindspore.common import np_dtype
55
56
  from mindspore.common.api import _cell_graph_executor as _executor
56
57
  from mindspore.common.api import _JitExecutor
57
58
  from mindspore.common.api import _get_parameter_layout
@@ -85,9 +86,12 @@ tensor_to_ms_type = {"Int8": mstype.int8, "UInt8": mstype.uint8, "Int16": mstype
85
86
  "Float16": mstype.float16, "Float32": mstype.float32, "Float64": mstype.float64,
86
87
  "Bool": mstype.bool_, "str": mstype.string, "BFloat16": mstype.bfloat16, "Int4": mstype.qint4x2}
87
88
 
88
- _tensor_to_np_type = {"Int8": np.int8, "UInt8": np.uint8, "Int16": np.int16, "UInt16": np.uint16,
89
- "Int32": np.int32, "UInt32": np.uint32, "Int64": np.int64, "UInt64": np.uint64,
90
- "Float16": np.float16, "Float32": np.float32, "Float64": np.float64, "Bool": np.bool_, "str": "U"}
89
+ tensor_to_np_type = {"Int8": np.int8, "UInt8": np.uint8, "Int16": np.int16, "UInt16": np.uint16,
90
+ "Int32": np.int32, "UInt32": np.uint32, "Int64": np.int64, "UInt64": np.uint64,
91
+ "Float16": np.float16, "Float32": np.float32, "Float64": np.float64, "Bool": np.bool_, "str": "U"}
92
+
93
+ if hasattr(np_dtype, "bfloat16"):
94
+ tensor_to_np_type["BFloat16"] = np_dtype.bfloat16
91
95
 
92
96
  np_type_convert = {"int32": np.int32, "float32": np.float32, "float16": np.float16, "float64": np.float64}
93
97
 
@@ -110,21 +114,6 @@ INT_64_MAX = 9223372036854775807
110
114
  cpu_cast = Cast().set_device("CPU")
111
115
 
112
116
  _ckpt_fs = FileSystem()
113
- _ckpt_fs_initialized = False
114
-
115
-
116
- def tensor_to_np_type(tensor_type_str):
117
- """tensor to numpy type"""
118
- if tensor_type_str == "BFloat16":
119
- from mindspore.common import np_dtype
120
- if not np_dtype.np_dtype_valid(True):
121
- raise TypeError(
122
- "The Numpy bfloat16 data type is not supported now, please ensure that the current "
123
- "Numpy version is not less than the version when the mindspore is compiled, "
124
- "and the major versions are same."
125
- )
126
- return np_dtype.bfloat16
127
- return _tensor_to_np_type.get(tensor_type_str)
128
117
 
129
118
 
130
119
  def init_ckpt_file_system(fs: FileSystem):
@@ -134,12 +123,8 @@ def init_ckpt_file_system(fs: FileSystem):
134
123
  _register_basic_file_system(fs)
135
124
 
136
125
 
137
- def _ensure_ckpt_fs_initialized():
138
- """Ensure checkpoint file system is initialized"""
139
- global _ckpt_fs_initialized
140
- if not _ckpt_fs_initialized:
141
- init_ckpt_file_system(_ckpt_fs)
142
- _ckpt_fs_initialized = True
126
+ # Initialize checkpoint file system
127
+ init_ckpt_file_system(_ckpt_fs)
143
128
 
144
129
 
145
130
  def _wait_async_process_save_ckpt():
@@ -473,7 +458,7 @@ def _exec_save(ckpt_file_name, data_list, enc_key=None, enc_mode="AES-GCM", map_
473
458
  f"simultaneously modified a file.")
474
459
  elif _ckpt_fs.backend != "mindio":
475
460
  os.rename(tmp_name, ckpt_file_name)
476
- os.chmod(ckpt_file_name, stat.S_IRUSR)
461
+ os.chmod(ckpt_file_name, stat.S_IRUSR)
477
462
  except BaseException as e:
478
463
  logger.critical("Failed to save the checkpoint file %s. Maybe don't have the permission to write files, "
479
464
  "or the disk space is insufficient and so on.", ckpt_file_name)
@@ -733,7 +718,6 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
733
718
  <https://mindspore.cn/tutorials/en/master/beginner/save_load.html#saving-and-loading-the-model-weight>`_
734
719
  """
735
720
  start_save_time = time.time()
736
- _ensure_ckpt_fs_initialized()
737
721
  ckpt_file_name = _check_save_obj_and_ckpt_file_name(save_obj, ckpt_file_name, format)
738
722
  integrated_save = Validator.check_bool(integrated_save)
739
723
  async_save = _check_async_save(async_save)
@@ -1284,7 +1268,11 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
1284
1268
  continue
1285
1269
  data = element.tensor.tensor_content
1286
1270
  data_type = element.tensor.tensor_type
1271
+ np_type = tensor_to_np_type.get(data_type)
1287
1272
  ms_type = tensor_to_ms_type[data_type]
1273
+ if data_type == 'str':
1274
+ str_length = int(len(data) / 4)
1275
+ np_type = np_type + str(str_length)
1288
1276
  param_data_list.append(data)
1289
1277
  if (element_id == len(checkpoint_list.value) - 1) or \
1290
1278
  (element.tag != checkpoint_list.value[element_id + 1].tag):
@@ -1292,8 +1280,6 @@ def _load_into_param_dict(ckpt_file_name, parameter_dict, specify_prefix, filter
1292
1280
  param_data_list.clear()
1293
1281
  dims = element.tensor.dims
1294
1282
  if data_type == 'str':
1295
- str_length = int(len(data) / 4)
1296
- np_type = "U" + str(str_length)
1297
1283
  str_value = np.frombuffer(new_data, np_type)
1298
1284
  parameter_dict[element.tag] = str(str_value[0])
1299
1285
  else:
@@ -1400,7 +1386,6 @@ def load_checkpoint(ckpt_file_name, net=None, strict_load=False, filter_prefix=N
1400
1386
  """
1401
1387
  start_load_time = time.time()
1402
1388
  vlog_print("1", "ME", __file__, sys._getframe().f_lineno, "Begin load checkpoint.")
1403
- _ensure_ckpt_fs_initialized()
1404
1389
  specify_prefix = _check_prefix(specify_prefix)
1405
1390
  filter_prefix = _check_prefix(filter_prefix)
1406
1391
  dec_key = Validator.check_isinstance('dec_key', dec_key, (type(None), bytes))
@@ -2213,11 +2198,6 @@ def _save_onnx(net, file_name, *inputs, **kwargs):
2213
2198
  file_name += ".onnx"
2214
2199
  if os.path.exists(file_name):
2215
2200
  os.chmod(file_name, stat.S_IWUSR)
2216
- else:
2217
- dir_path = os.path.dirname(file_name)
2218
- if not os.path.exists(dir_path):
2219
- os.makedirs(dir_path, mode=0o700, exist_ok=True)
2220
- os.chmod(dir_path, 0o700)
2221
2201
  with open(file_name, 'wb') as f:
2222
2202
  f.write(onnx_stream)
2223
2203
  os.chmod(file_name, stat.S_IRUSR)
@@ -2614,7 +2594,7 @@ def parse_print(print_file_name):
2614
2594
  dims = print_.tensor.dims
2615
2595
  data_type = print_.tensor.tensor_type
2616
2596
  data = print_.tensor.tensor_content
2617
- np_type = tensor_to_np_type(data_type)
2597
+ np_type = tensor_to_np_type.get(data_type)
2618
2598
  param_data = np.fromstring(data, np_type)
2619
2599
  ms_type = tensor_to_ms_type.get(data_type)
2620
2600
  if dims and dims != [0]:
@@ -29,7 +29,7 @@ from mindspore import nn
29
29
  from mindspore.train.model import Model
30
30
  from mindspore.train.dataset_helper import connect_network_with_dataset
31
31
  from mindspore.parallel._utils import _need_to_full, _to_full_tensor
32
- from mindspore.common.dtype import _pytype_to_dtype
32
+ from mindspore.common.dtype import pytype_to_dtype
33
33
  from mindspore._c_expression import init_exec_dataset
34
34
  from mindspore.train.train_thor.dataset_helper import DatasetHelper
35
35
 
@@ -46,7 +46,7 @@ def _convert_to_ms_type(types):
46
46
  """
47
47
  ms_types = []
48
48
  for numpy_type in types:
49
- ms_type = _pytype_to_dtype(numpy_type) # pylint:disable=protected-access
49
+ ms_type = pytype_to_dtype(numpy_type)
50
50
  ms_types.append(ms_type)
51
51
  return ms_types
52
52
 
mindspore/turbojpeg.dll CHANGED
Binary file
@@ -0,0 +1,81 @@
1
+ # Copyright 2025 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """hooks"""
16
+ from collections import OrderedDict
17
+ import weakref
18
+ from typing import Any, Tuple
19
+
20
+
21
+ class _RemovableHandle:
22
+ r"""
23
+ A handle which provides the capability to remove a hook.
24
+
25
+ Args:
26
+ hooks_dict (dict): A dictionary of hooks, indexed by hook `id`.
27
+
28
+ Keyword Args:
29
+ extra_dict (Union[dict, list[dict]], optional): An additional dictionary or list of
30
+ dictionaries whose keys will be deleted when the same keys are
31
+ removed from `hooks_dict`. Default ``None``.
32
+ """
33
+
34
+ id: int
35
+ next_id: int = 0
36
+
37
+ def __init__(self, hooks_dict: Any, *, extra_dict: Any = None) -> None:
38
+ self.hooks_dict_ref = weakref.ref(hooks_dict)
39
+ self.id = _RemovableHandle.next_id
40
+ _RemovableHandle.next_id += 1
41
+
42
+ self.extra_dict_ref: Tuple = ()
43
+ if isinstance(extra_dict, dict):
44
+ self.extra_dict_ref = (weakref.ref(extra_dict),)
45
+ elif isinstance(extra_dict, list):
46
+ self.extra_dict_ref = tuple(weakref.ref(d) for d in extra_dict)
47
+
48
+ def remove(self) -> None:
49
+ hooks_dict = self.hooks_dict_ref()
50
+ if hooks_dict is not None and self.id in hooks_dict:
51
+ del hooks_dict[self.id]
52
+
53
+ for ref in self.extra_dict_ref:
54
+ extra_dict = ref()
55
+ if extra_dict is not None and self.id in extra_dict:
56
+ del extra_dict[self.id]
57
+
58
+ def __getstate__(self):
59
+ if self.extra_dict_ref is None:
60
+ return (self.hooks_dict_ref(), self.id)
61
+ return (self.hooks_dict_ref(), self.id, tuple(ref() for ref in self.extra_dict_ref))
62
+
63
+ def __setstate__(self, state) -> None:
64
+ if state[0] is None:
65
+ # create a dead reference
66
+ self.hooks_dict_ref = weakref.ref(OrderedDict())
67
+ else:
68
+ self.hooks_dict_ref = weakref.ref(state[0])
69
+ self.id = state[1]
70
+ _RemovableHandle.next_id = max(_RemovableHandle.next_id, self.id + 1)
71
+
72
+ if len(state) < 3 or state[2] is None:
73
+ self.extra_dict_ref = ()
74
+ else:
75
+ self.extra_dict_ref = tuple(weakref.ref(d) for d in state[2])
76
+
77
+ def __enter__(self) -> "_RemovableHandle":
78
+ return self
79
+
80
+ def __exit__(self, type: Any, value: Any, tb: Any) -> None:
81
+ self.remove()
mindspore/utils/utils.py CHANGED
@@ -132,16 +132,16 @@ class TftHandle:
132
132
  if "ARF:1" in tft_env:
133
133
  logger.warning(f"Disable hccl watchdog when using ARF.")
134
134
  context.set_context(ascend_config={"hccl_watchdog": False})
135
- if "TTP:1" not in tft_env:
136
- logger.warning(f"Turn on TTP config when using ARF.")
137
- tft_env = tft_env.replace("{", "").replace("}", "")
138
- all_opts = [part.strip() for part in tft_env.split(",")] + ["TTP:1"]
139
- os.environ["MS_ENABLE_TFT"] = "{" + ",".join(all_opts) + "}"
135
+ logger.warning(f"Turn on TTP config when using ARF.")
136
+ if "TTP:1,UCE:1,ARF:1" not in tft_env:
137
+ os.environ["MS_ENABLE_TFT"] = "{TTP:1,ARF:1}"
140
138
  os.environ["MS_ENABLE_RECOVERY"] = "1"
141
139
 
140
+ mode = context.get_context("mode")
142
141
  device_target = context.get_context("device_target")
143
- if device_target != "Ascend":
144
- logger.warning(f"MindIO adataper only support on Ascend device but got device {device_target}!")
142
+ if device_target != "Ascend" or mode != context.GRAPH_MODE:
143
+ logger.warning(f"MindIO adataper only support on Ascend device with GRAPH Mode!"
144
+ f"device:{device_target}, run mode: {mode}")
145
145
  return
146
146
 
147
147
  ctrl_port = int(os.getenv("MS_TFT_PORT"))
@@ -154,7 +154,7 @@ class TftHandle:
154
154
  from mindio_ttp import framework_ttp as tft
155
155
  self.tft = tft
156
156
  except BaseException as e:
157
- raise ModuleNotFoundError(f"Module not found. Detail info {str(e)}")
157
+ raise ModuleNotFoundError(f"Module nopt found. Detail info {str(e)}")
158
158
  world_size = int(os.getenv("MS_WORKER_NUM")) # from msrun
159
159
  cur_rank = int(os.getenv("MS_NODE_ID")) # from msrun
160
160
  enable_local_copy = False
mindspore/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.7.0'
1
+ __version__ = '2.7.0rc1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mindspore
3
- Version: 2.7.0
3
+ Version: 2.7.0rc1
4
4
  Summary: MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
5
5
  Home-page: https://www.mindspore.cn
6
6
  Author: The MindSpore Authors