mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (602) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +13 -6
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -0
  9. mindspore/_checkparam.py +3 -38
  10. mindspore/_deprecated/__init__.py +17 -0
  11. mindspore/_deprecated/jit.py +198 -0
  12. mindspore/_extends/builtin_operations.py +1 -1
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +6 -7
  15. mindspore/_extends/parse/compile_config.py +83 -0
  16. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  17. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  18. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  19. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  20. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  21. mindspore/_extends/parse/parser.py +46 -197
  22. mindspore/_extends/parse/resources.py +1 -5
  23. mindspore/_extends/parse/standard_method.py +217 -98
  24. mindspore/_extends/pijit/__init__.py +2 -2
  25. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  26. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  27. mindspore/_extends/utils.py +1 -1
  28. mindspore/amp.py +11 -5
  29. mindspore/atlprov.dll +0 -0
  30. mindspore/avcodec-59.dll +0 -0
  31. mindspore/avdevice-59.dll +0 -0
  32. mindspore/avfilter-8.dll +0 -0
  33. mindspore/avformat-59.dll +0 -0
  34. mindspore/avutil-57.dll +0 -0
  35. mindspore/boost/__init__.py +2 -2
  36. mindspore/boost/base.py +3 -7
  37. mindspore/boost/boost_cell_wrapper.py +138 -43
  38. mindspore/c1.dll +0 -0
  39. mindspore/c1xx.dll +0 -0
  40. mindspore/c2.dll +0 -0
  41. mindspore/common/__init__.py +6 -3
  42. mindspore/common/_grad_function.py +56 -0
  43. mindspore/common/_pijit_context.py +14 -5
  44. mindspore/common/_register_for_tensor.py +1 -2
  45. mindspore/common/_stub_tensor.py +30 -14
  46. mindspore/common/_tensor_cpp_method.py +17 -0
  47. mindspore/common/_tensor_docs.py +4760 -0
  48. mindspore/common/api.py +435 -371
  49. mindspore/common/auto_dynamic_shape.py +41 -44
  50. mindspore/common/dtype.py +39 -36
  51. mindspore/common/dump.py +9 -6
  52. mindspore/common/file_system.py +9 -1
  53. mindspore/common/generator.py +2 -0
  54. mindspore/common/hook_handle.py +6 -2
  55. mindspore/common/initializer.py +13 -10
  56. mindspore/common/jit_begin_end.py +94 -0
  57. mindspore/common/jit_config.py +6 -1
  58. mindspore/common/jit_context.py +76 -0
  59. mindspore/common/jit_trace.py +378 -0
  60. mindspore/common/lazy_inline.py +9 -3
  61. mindspore/common/mindir_util.py +10 -2
  62. mindspore/common/mutable.py +5 -4
  63. mindspore/common/parameter.py +135 -52
  64. mindspore/common/seed.py +2 -2
  65. mindspore/common/sparse_tensor.py +23 -17
  66. mindspore/common/tensor.py +951 -1992
  67. mindspore/communication/__init__.py +7 -5
  68. mindspore/communication/_comm_helper.py +52 -2
  69. mindspore/communication/comm_func.py +240 -181
  70. mindspore/communication/management.py +95 -26
  71. mindspore/context.py +314 -566
  72. mindspore/dataset/__init__.py +65 -37
  73. mindspore/dataset/audio/__init__.py +2 -8
  74. mindspore/dataset/audio/transforms.py +3 -17
  75. mindspore/dataset/callback/ds_callback.py +2 -1
  76. mindspore/dataset/core/config.py +87 -6
  77. mindspore/dataset/engine/cache_admin.py +3 -3
  78. mindspore/dataset/engine/cache_client.py +6 -5
  79. mindspore/dataset/engine/datasets.py +292 -267
  80. mindspore/dataset/engine/datasets_audio.py +22 -8
  81. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  82. mindspore/dataset/engine/datasets_text.py +78 -48
  83. mindspore/dataset/engine/datasets_user_defined.py +182 -116
  84. mindspore/dataset/engine/datasets_vision.py +120 -44
  85. mindspore/dataset/engine/iterators.py +283 -63
  86. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  87. mindspore/dataset/engine/obs/util.py +8 -0
  88. mindspore/dataset/engine/queue.py +40 -0
  89. mindspore/dataset/engine/samplers.py +289 -43
  90. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  91. mindspore/dataset/engine/validators.py +53 -11
  92. mindspore/dataset/text/__init__.py +7 -6
  93. mindspore/dataset/text/transforms.py +6 -5
  94. mindspore/dataset/text/utils.py +3 -3
  95. mindspore/dataset/transforms/__init__.py +0 -9
  96. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  97. mindspore/dataset/transforms/transforms.py +31 -14
  98. mindspore/dataset/utils/browse_dataset.py +1 -1
  99. mindspore/dataset/vision/__init__.py +2 -9
  100. mindspore/dataset/vision/transforms.py +202 -158
  101. mindspore/dataset/vision/utils.py +7 -5
  102. mindspore/dataset/vision/validators.py +1 -2
  103. mindspore/device_context/__init__.py +21 -0
  104. mindspore/device_context/ascend/__init__.py +25 -0
  105. mindspore/device_context/ascend/device.py +72 -0
  106. mindspore/device_context/ascend/op_debug.py +153 -0
  107. mindspore/device_context/ascend/op_precision.py +193 -0
  108. mindspore/device_context/ascend/op_tuning.py +123 -0
  109. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  110. mindspore/device_context/cpu/device.py +62 -0
  111. mindspore/device_context/cpu/op_tuning.py +43 -0
  112. mindspore/device_context/gpu/__init__.py +21 -0
  113. mindspore/device_context/gpu/device.py +70 -0
  114. mindspore/device_context/gpu/op_precision.py +67 -0
  115. mindspore/device_context/gpu/op_tuning.py +175 -0
  116. mindspore/device_manager.py +170 -0
  117. mindspore/dnnl.dll +0 -0
  118. mindspore/dpcmi.dll +0 -0
  119. mindspore/experimental/es/embedding_service.py +35 -27
  120. mindspore/experimental/llm_boost/__init__.py +1 -0
  121. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  122. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  123. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  124. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  125. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  126. mindspore/experimental/llm_boost/register.py +1 -0
  127. mindspore/experimental/map_parameter.py +4 -4
  128. mindspore/experimental/optim/adadelta.py +6 -6
  129. mindspore/experimental/optim/adagrad.py +4 -4
  130. mindspore/experimental/optim/adam.py +7 -0
  131. mindspore/experimental/optim/adamax.py +4 -4
  132. mindspore/experimental/optim/adamw.py +4 -0
  133. mindspore/experimental/optim/asgd.py +1 -1
  134. mindspore/experimental/optim/lr_scheduler.py +73 -46
  135. mindspore/experimental/optim/radam.py +34 -31
  136. mindspore/experimental/optim/rprop.py +1 -1
  137. mindspore/experimental/optim/sgd.py +1 -1
  138. mindspore/hal/contiguous_tensors_handle.py +6 -10
  139. mindspore/hal/device.py +55 -53
  140. mindspore/hal/event.py +52 -52
  141. mindspore/hal/memory.py +157 -117
  142. mindspore/hal/stream.py +150 -109
  143. mindspore/include/api/context.h +0 -1
  144. mindspore/include/dataset/constants.h +7 -4
  145. mindspore/include/dataset/execute.h +2 -2
  146. mindspore/jpeg62.dll +0 -0
  147. mindspore/log.py +50 -0
  148. mindspore/mindrecord/__init__.py +21 -8
  149. mindspore/mindrecord/config.py +17 -316
  150. mindspore/mindrecord/filereader.py +1 -9
  151. mindspore/mindrecord/filewriter.py +5 -15
  152. mindspore/mindrecord/mindpage.py +1 -9
  153. mindspore/mindspore_backend_common.dll +0 -0
  154. mindspore/mindspore_backend_manager.dll +0 -0
  155. mindspore/mindspore_common.dll +0 -0
  156. mindspore/mindspore_core.dll +0 -0
  157. mindspore/mindspore_dump.dll +0 -0
  158. mindspore/mindspore_frontend.dll +0 -0
  159. mindspore/mindspore_glog.dll +0 -0
  160. mindspore/mindspore_memory_pool.dll +0 -0
  161. mindspore/mindspore_ms_backend.dll +0 -0
  162. mindspore/mindspore_ops.dll +0 -0
  163. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  164. mindspore/mindspore_ops_kernel_common.dll +0 -0
  165. mindspore/mindspore_profiler.dll +0 -0
  166. mindspore/mindspore_pyboost.dll +0 -0
  167. mindspore/mindspore_pynative.dll +0 -0
  168. mindspore/mindspore_res_manager.dll +0 -0
  169. mindspore/mindspore_runtime_pipeline.dll +0 -0
  170. mindspore/mint/__init__.py +796 -759
  171. mindspore/mint/distributed/__init__.py +70 -4
  172. mindspore/mint/distributed/distributed.py +2679 -44
  173. mindspore/mint/linalg/__init__.py +8 -0
  174. mindspore/mint/nn/__init__.py +743 -22
  175. mindspore/mint/nn/functional.py +716 -23
  176. mindspore/mint/nn/layer/__init__.py +21 -4
  177. mindspore/mint/nn/layer/_functions.py +334 -0
  178. mindspore/mint/nn/layer/activation.py +276 -1
  179. mindspore/mint/nn/layer/basic.py +123 -0
  180. mindspore/mint/nn/layer/conv.py +921 -0
  181. mindspore/mint/nn/layer/normalization.py +223 -28
  182. mindspore/mint/nn/layer/padding.py +797 -0
  183. mindspore/mint/nn/layer/pooling.py +235 -0
  184. mindspore/mint/optim/__init__.py +3 -1
  185. mindspore/mint/optim/adam.py +223 -0
  186. mindspore/mint/optim/adamw.py +26 -19
  187. mindspore/mint/optim/sgd.py +171 -0
  188. mindspore/mint/special/__init__.py +2 -1
  189. mindspore/msobj140.dll +0 -0
  190. mindspore/mspdb140.dll +0 -0
  191. mindspore/mspdbcore.dll +0 -0
  192. mindspore/mspdbst.dll +0 -0
  193. mindspore/mspft140.dll +0 -0
  194. mindspore/msvcdis140.dll +0 -0
  195. mindspore/msvcp140_1.dll +0 -0
  196. mindspore/msvcp140_2.dll +0 -0
  197. mindspore/msvcp140_atomic_wait.dll +0 -0
  198. mindspore/msvcp140_codecvt_ids.dll +0 -0
  199. mindspore/multiprocessing/__init__.py +5 -0
  200. mindspore/nn/__init__.py +4 -1
  201. mindspore/nn/cell.py +1370 -189
  202. mindspore/nn/dynamic_lr.py +2 -1
  203. mindspore/nn/layer/activation.py +29 -27
  204. mindspore/nn/layer/basic.py +51 -35
  205. mindspore/nn/layer/channel_shuffle.py +3 -3
  206. mindspore/nn/layer/container.py +1 -1
  207. mindspore/nn/layer/conv.py +22 -17
  208. mindspore/nn/layer/embedding.py +12 -11
  209. mindspore/nn/layer/normalization.py +56 -49
  210. mindspore/nn/layer/padding.py +4 -3
  211. mindspore/nn/layer/pooling.py +120 -42
  212. mindspore/nn/layer/rnn_cells.py +1 -1
  213. mindspore/nn/layer/rnns.py +2 -1
  214. mindspore/nn/layer/timedistributed.py +5 -5
  215. mindspore/nn/layer/transformer.py +59 -36
  216. mindspore/nn/learning_rate_schedule.py +8 -4
  217. mindspore/nn/loss/loss.py +58 -55
  218. mindspore/nn/optim/ada_grad.py +7 -5
  219. mindspore/nn/optim/adadelta.py +11 -9
  220. mindspore/nn/optim/adafactor.py +1 -1
  221. mindspore/nn/optim/adam.py +17 -13
  222. mindspore/nn/optim/adamax.py +8 -7
  223. mindspore/nn/optim/adasum.py +5 -5
  224. mindspore/nn/optim/asgd.py +1 -1
  225. mindspore/nn/optim/ftrl.py +11 -9
  226. mindspore/nn/optim/lamb.py +1 -1
  227. mindspore/nn/optim/lars.py +1 -4
  228. mindspore/nn/optim/lazyadam.py +12 -10
  229. mindspore/nn/optim/momentum.py +7 -6
  230. mindspore/nn/optim/optimizer.py +3 -3
  231. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  232. mindspore/nn/optim/rmsprop.py +13 -12
  233. mindspore/nn/optim/rprop.py +11 -9
  234. mindspore/nn/optim/sgd.py +9 -6
  235. mindspore/nn/optim/tft_wrapper.py +5 -2
  236. mindspore/nn/optim/thor.py +2 -1
  237. mindspore/nn/probability/bijector/bijector.py +17 -11
  238. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  239. mindspore/nn/probability/bijector/invert.py +2 -2
  240. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  241. mindspore/nn/probability/bijector/softplus.py +3 -2
  242. mindspore/nn/probability/distribution/beta.py +3 -3
  243. mindspore/nn/probability/distribution/categorical.py +1 -1
  244. mindspore/nn/probability/distribution/cauchy.py +4 -2
  245. mindspore/nn/probability/distribution/exponential.py +6 -7
  246. mindspore/nn/probability/distribution/gamma.py +2 -2
  247. mindspore/nn/probability/distribution/gumbel.py +2 -2
  248. mindspore/nn/probability/distribution/half_normal.py +5 -3
  249. mindspore/nn/probability/distribution/logistic.py +5 -3
  250. mindspore/nn/probability/distribution/poisson.py +1 -1
  251. mindspore/nn/probability/distribution/uniform.py +5 -3
  252. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  253. mindspore/nn/reinforcement/tensor_array.py +1 -1
  254. mindspore/nn/utils/init.py +13 -11
  255. mindspore/nn/wrap/__init__.py +6 -6
  256. mindspore/nn/wrap/cell_wrapper.py +181 -122
  257. mindspore/nn/wrap/grad_reducer.py +45 -36
  258. mindspore/nn/wrap/loss_scale.py +6 -7
  259. mindspore/numpy/array_creations.py +63 -65
  260. mindspore/numpy/array_ops.py +149 -144
  261. mindspore/numpy/logic_ops.py +41 -42
  262. mindspore/numpy/math_ops.py +365 -363
  263. mindspore/numpy/utils.py +17 -18
  264. mindspore/numpy/utils_const.py +5 -6
  265. mindspore/opencv_core452.dll +0 -0
  266. mindspore/opencv_imgcodecs452.dll +0 -0
  267. mindspore/opencv_imgproc452.dll +0 -0
  268. mindspore/ops/__init__.py +5 -3
  269. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  270. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  271. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  272. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  273. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  274. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  275. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  276. mindspore/ops/_register_for_op.py +0 -11
  277. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  278. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  279. mindspore/ops/_vmap/vmap_array_ops.py +27 -25
  280. mindspore/ops/_vmap/vmap_base.py +0 -2
  281. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  282. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  283. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  284. mindspore/ops/auto_generate/__init__.py +4 -3
  285. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
  286. mindspore/ops/auto_generate/gen_extend_func.py +764 -124
  287. mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
  288. mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
  289. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  290. mindspore/ops/composite/__init__.py +2 -1
  291. mindspore/ops/composite/base.py +20 -25
  292. mindspore/ops/composite/math_ops.py +6 -16
  293. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  294. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  295. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  296. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  297. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  299. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  301. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  302. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  303. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  304. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  305. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  306. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  307. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  308. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  309. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  310. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  311. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  312. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  313. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  314. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  315. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  316. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  317. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  318. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  319. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  320. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  321. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  322. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  323. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  324. mindspore/ops/function/__init__.py +40 -2
  325. mindspore/ops/function/_add_attr_func.py +58 -0
  326. mindspore/ops/function/array_func.py +2089 -2403
  327. mindspore/ops/function/clip_func.py +80 -23
  328. mindspore/ops/function/debug_func.py +57 -57
  329. mindspore/ops/function/grad/__init__.py +1 -0
  330. mindspore/ops/function/grad/grad_func.py +104 -71
  331. mindspore/ops/function/image_func.py +2 -2
  332. mindspore/ops/function/linalg_func.py +47 -78
  333. mindspore/ops/function/math_func.py +4501 -3802
  334. mindspore/ops/function/nn_func.py +1726 -620
  335. mindspore/ops/function/other_func.py +159 -1
  336. mindspore/ops/function/parameter_func.py +18 -84
  337. mindspore/ops/function/random_func.py +440 -387
  338. mindspore/ops/function/reshard_func.py +4 -70
  339. mindspore/ops/function/sparse_func.py +3 -3
  340. mindspore/ops/function/sparse_unary_func.py +6 -6
  341. mindspore/ops/function/spectral_func.py +25 -58
  342. mindspore/ops/function/vmap_func.py +24 -17
  343. mindspore/ops/functional.py +22 -7
  344. mindspore/ops/functional_overload.py +1440 -0
  345. mindspore/ops/op_info_register.py +32 -244
  346. mindspore/ops/operations/__init__.py +13 -7
  347. mindspore/ops/operations/_custom_ops_utils.py +247 -0
  348. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  349. mindspore/ops/operations/_grad_ops.py +2 -43
  350. mindspore/ops/operations/_infer_ops.py +2 -1
  351. mindspore/ops/operations/_inner_ops.py +43 -84
  352. mindspore/ops/operations/_ms_kernel.py +4 -10
  353. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  354. mindspore/ops/operations/_scalar_ops.py +3 -2
  355. mindspore/ops/operations/_sequence_ops.py +1 -1
  356. mindspore/ops/operations/_tensor_array.py +1 -1
  357. mindspore/ops/operations/array_ops.py +81 -324
  358. mindspore/ops/operations/comm_ops.py +154 -108
  359. mindspore/ops/operations/custom_ops.py +232 -78
  360. mindspore/ops/operations/debug_ops.py +153 -59
  361. mindspore/ops/operations/inner_ops.py +7 -5
  362. mindspore/ops/operations/linalg_ops.py +1 -57
  363. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  364. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  365. mindspore/ops/operations/math_ops.py +32 -234
  366. mindspore/ops/operations/nn_ops.py +210 -498
  367. mindspore/ops/operations/other_ops.py +62 -9
  368. mindspore/ops/operations/random_ops.py +13 -7
  369. mindspore/ops/operations/reshard_ops.py +1 -1
  370. mindspore/ops/operations/sparse_ops.py +2 -2
  371. mindspore/ops/primitive.py +66 -53
  372. mindspore/ops/tensor_method.py +1888 -0
  373. mindspore/ops_generate/__init__.py +0 -5
  374. mindspore/ops_generate/aclnn/__init__.py +0 -0
  375. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  376. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  377. mindspore/ops_generate/api/__init__.py +0 -0
  378. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  379. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  380. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  381. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  382. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  383. mindspore/ops_generate/api/gen_api.py +103 -0
  384. mindspore/ops_generate/api/op_api_proto.py +235 -0
  385. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  386. mindspore/ops_generate/common/__init__.py +0 -0
  387. mindspore/ops_generate/common/base_generator.py +11 -0
  388. mindspore/ops_generate/common/gen_constants.py +91 -0
  389. mindspore/ops_generate/common/gen_utils.py +348 -0
  390. mindspore/ops_generate/common/op_proto.py +473 -0
  391. mindspore/ops_generate/common/template.py +523 -0
  392. mindspore/ops_generate/gen_ops.py +22 -1069
  393. mindspore/ops_generate/op_def/__init__.py +0 -0
  394. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  395. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  396. mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
  397. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  398. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  399. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  400. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  401. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  402. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  403. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  404. mindspore/ops_generate/pyboost/__init__.py +0 -0
  405. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  406. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  407. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  408. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  409. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  410. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  411. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  412. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  413. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  414. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  415. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  416. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  417. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  418. mindspore/ops_generate/resources/__init__.py +0 -0
  419. mindspore/ops_generate/resources/resource_list.py +30 -0
  420. mindspore/ops_generate/resources/resource_loader.py +36 -0
  421. mindspore/ops_generate/resources/resource_manager.py +64 -0
  422. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  423. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  424. mindspore/parallel/__init__.py +7 -3
  425. mindspore/parallel/_auto_parallel_context.py +152 -34
  426. mindspore/parallel/_cell_wrapper.py +130 -15
  427. mindspore/parallel/_parallel_serialization.py +107 -5
  428. mindspore/parallel/_ps_context.py +1 -1
  429. mindspore/parallel/_recovery_context.py +7 -2
  430. mindspore/parallel/_tensor.py +142 -18
  431. mindspore/parallel/_utils.py +199 -23
  432. mindspore/parallel/algo_parameter_config.py +4 -4
  433. mindspore/parallel/auto_parallel.py +732 -0
  434. mindspore/parallel/checkpoint_convert.py +159 -0
  435. mindspore/parallel/checkpoint_transform.py +698 -35
  436. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  437. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  438. mindspore/parallel/cluster/run.py +21 -4
  439. mindspore/parallel/function/__init__.py +24 -0
  440. mindspore/parallel/function/reshard_func.py +259 -0
  441. mindspore/parallel/nn/__init__.py +25 -0
  442. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  443. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  444. mindspore/parallel/parameter_broadcast.py +25 -14
  445. mindspore/parallel/shard.py +137 -58
  446. mindspore/parallel/transform_safetensors.py +363 -305
  447. mindspore/pgodb140.dll +0 -0
  448. mindspore/pgort140.dll +0 -0
  449. mindspore/profiler/__init__.py +22 -5
  450. mindspore/profiler/analysis/__init__.py +0 -0
  451. mindspore/profiler/analysis/parser/__init__.py +0 -0
  452. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  453. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  454. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  455. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  456. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  457. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  458. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  459. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  460. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
  461. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  462. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  463. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  464. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  465. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  466. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  467. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  468. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  469. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  470. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  471. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  472. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  473. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  474. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  475. mindspore/profiler/analysis/task_manager.py +131 -0
  476. mindspore/profiler/analysis/time_converter.py +84 -0
  477. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  478. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  479. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  480. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  481. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  482. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  483. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  484. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  485. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  486. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  487. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  488. mindspore/profiler/analysis/work_flow.py +73 -0
  489. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  490. mindspore/profiler/common/command_executor.py +90 -0
  491. mindspore/profiler/common/constant.py +186 -3
  492. mindspore/profiler/common/file_manager.py +208 -0
  493. mindspore/profiler/common/log.py +130 -0
  494. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  495. mindspore/profiler/common/path_manager.py +395 -0
  496. mindspore/profiler/common/process_bar.py +168 -0
  497. mindspore/profiler/common/process_pool.py +9 -3
  498. mindspore/profiler/common/profiler_context.py +500 -0
  499. mindspore/profiler/common/profiler_info.py +304 -0
  500. mindspore/profiler/common/profiler_meta_data.py +74 -0
  501. mindspore/profiler/common/profiler_output_path.py +284 -0
  502. mindspore/profiler/common/profiler_parameters.py +251 -0
  503. mindspore/profiler/common/profiler_path_manager.py +179 -0
  504. mindspore/profiler/common/record_function.py +76 -0
  505. mindspore/profiler/common/tlv_decoder.py +76 -0
  506. mindspore/profiler/common/util.py +75 -2
  507. mindspore/profiler/dynamic_profiler.py +341 -75
  508. mindspore/profiler/envprofiler.py +163 -0
  509. mindspore/profiler/experimental_config.py +197 -0
  510. mindspore/profiler/mstx.py +242 -0
  511. mindspore/profiler/platform/__init__.py +21 -0
  512. mindspore/profiler/platform/base_profiler.py +40 -0
  513. mindspore/profiler/platform/cpu_profiler.py +124 -0
  514. mindspore/profiler/platform/gpu_profiler.py +74 -0
  515. mindspore/profiler/platform/npu_profiler.py +335 -0
  516. mindspore/profiler/profiler.py +1073 -90
  517. mindspore/profiler/profiler_action_controller.py +187 -0
  518. mindspore/profiler/profiler_interface.py +118 -0
  519. mindspore/profiler/schedule.py +243 -0
  520. mindspore/rewrite/api/node.py +15 -13
  521. mindspore/rewrite/api/symbol_tree.py +2 -3
  522. mindspore/run_check/_check_version.py +27 -20
  523. mindspore/run_check/run_check.py +1 -1
  524. mindspore/runtime/__init__.py +37 -0
  525. mindspore/runtime/device.py +27 -0
  526. mindspore/runtime/event.py +209 -0
  527. mindspore/runtime/executor.py +177 -0
  528. mindspore/runtime/memory.py +409 -0
  529. mindspore/runtime/stream.py +460 -0
  530. mindspore/runtime/thread_bind_core.py +401 -0
  531. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  532. mindspore/swresample-4.dll +0 -0
  533. mindspore/swscale-6.dll +0 -0
  534. mindspore/tbbmalloc.dll +0 -0
  535. mindspore/tinyxml2.dll +0 -0
  536. mindspore/train/__init__.py +8 -8
  537. mindspore/train/_utils.py +88 -25
  538. mindspore/train/amp.py +9 -5
  539. mindspore/train/callback/__init__.py +2 -2
  540. mindspore/train/callback/_callback.py +2 -16
  541. mindspore/train/callback/_checkpoint.py +53 -55
  542. mindspore/train/callback/_cluster_monitor.py +14 -18
  543. mindspore/train/callback/_early_stop.py +1 -1
  544. mindspore/train/callback/_flops_collector.py +103 -68
  545. mindspore/train/callback/_history.py +8 -5
  546. mindspore/train/callback/_lambda_callback.py +2 -2
  547. mindspore/train/callback/_landscape.py +0 -3
  548. mindspore/train/callback/_loss_monitor.py +2 -1
  549. mindspore/train/callback/_on_request_exit.py +6 -5
  550. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  551. mindspore/train/callback/_summary_collector.py +52 -19
  552. mindspore/train/callback/_time_monitor.py +2 -1
  553. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
  554. mindspore/train/data_sink.py +25 -2
  555. mindspore/train/dataset_helper.py +15 -16
  556. mindspore/train/loss_scale_manager.py +8 -7
  557. mindspore/train/metrics/accuracy.py +3 -3
  558. mindspore/train/metrics/confusion_matrix.py +9 -9
  559. mindspore/train/metrics/error.py +3 -3
  560. mindspore/train/metrics/hausdorff_distance.py +4 -4
  561. mindspore/train/metrics/mean_surface_distance.py +3 -3
  562. mindspore/train/metrics/metric.py +0 -12
  563. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  564. mindspore/train/metrics/precision.py +11 -10
  565. mindspore/train/metrics/recall.py +9 -9
  566. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  567. mindspore/train/mind_ir_pb2.py +174 -46
  568. mindspore/train/model.py +184 -113
  569. mindspore/train/serialization.py +622 -978
  570. mindspore/train/summary/_summary_adapter.py +2 -2
  571. mindspore/train/summary/summary_record.py +2 -3
  572. mindspore/train/train_thor/model_thor.py +1 -1
  573. mindspore/turbojpeg.dll +0 -0
  574. mindspore/utils/__init__.py +6 -3
  575. mindspore/utils/dryrun.py +140 -0
  576. mindspore/utils/hooks.py +81 -0
  577. mindspore/utils/runtime_execution_order_check.py +550 -0
  578. mindspore/utils/utils.py +138 -4
  579. mindspore/vcmeta.dll +0 -0
  580. mindspore/vcruntime140.dll +0 -0
  581. mindspore/vcruntime140_1.dll +0 -0
  582. mindspore/version.py +1 -1
  583. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
  584. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +587 -418
  585. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
  586. mindspore/_install_custom.py +0 -43
  587. mindspore/common/_register_for_adapter.py +0 -74
  588. mindspore/common/_tensor_overload.py +0 -139
  589. mindspore/mindspore_np_dtype.dll +0 -0
  590. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  591. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  592. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  593. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  594. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  595. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  596. mindspore/ops_generate/gen_utils.py +0 -209
  597. mindspore/ops_generate/op_proto.py +0 -145
  598. mindspore/ops_generate/template.py +0 -261
  599. mindspore/profiler/envprofiling.py +0 -254
  600. mindspore/profiler/profiling.py +0 -1926
  601. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
  602. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,938 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """
16
+ This module defines several classes and functions for generating C++ code for PyBoost operations,
17
+ including function headers, source files, and registration code. It handles the generation of code
18
+ for different devices (Ascend, CPU, GPU) and manages residual files associated with operator prototypes.
19
+ """
20
+
21
+ import os
22
+ import re
23
+
24
+ import common.template as template
25
+ import common.gen_constants as K
26
+ from common.gen_utils import save_file
27
+ from common.op_proto import OpProto
28
+ from common.base_generator import BaseGenerator
29
+
30
+ from .pyboost_utils import is_cube, AclnnUtils, get_return_type, merge_strings_by_chunk_size, is_op_multi_output, \
31
+ chunk_list
32
+ from .op_template_parser import OpTemplateParser
33
+
34
+
35
+ class PyboostCommonOpHeaderGenerator(BaseGenerator):
36
+ """
37
+ Generates common C++ headers for PyBoost operations.
38
+
39
+ This class processes operator prototypes and generates header files containing function definitions
40
+ based on templates provided. It specifically generates the headers that define operations for PyBoost.
41
+ """
42
+
43
+ def __init__(self):
44
+ self.pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE
45
+
46
+ def generate(self, work_path, op_protos):
47
+ """
48
+ Generates header files for the provided operator prototypes.
49
+
50
+ Args:
51
+ work_path (str): The directory path where the header files will be saved.
52
+ op_protos (list): A list of operator prototypes containing information about the operators.
53
+
54
+ Returns:
55
+ None
56
+ """
57
+ for op_proto in op_protos:
58
+ if op_proto.op_dispatch is None:
59
+ continue
60
+ op_parser = OpTemplateParser(op_proto)
61
+ op_name_str = op_proto.op_class.name
62
+ call_args_with_type = op_parser.parse_call_args_with_types()
63
+ cpp_func_return = _generate_cpp_func_return(op_proto)
64
+ output_is_tuple = "bool output_is_tuple() const override { return true; }" \
65
+ if is_op_multi_output(op_proto.op_returns) else ''
66
+ pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE.replace(op_name=op_name_str,
67
+ op_name_upper=op_name_str.upper(),
68
+ call_args=call_args_with_type,
69
+ return_type=cpp_func_return,
70
+ output_is_tuple=output_is_tuple)
71
+ save_path = os.path.join(work_path, f"{K.MS_PYBOOST_BASE_PATH}/auto_generate/")
72
+ file_name = f"{op_proto.op_name}.h"
73
+ save_file(save_path, file_name, pyboost_op_header_str)
74
+
75
+
76
+ class PyboostOpHeaderGenerator(BaseGenerator):
77
+ """
78
+ Generates device-specific C++ headers for PyBoost operations.
79
+
80
+ This class generates header files for different devices (Ascend, CPU, GPU) and defines
81
+ the operation functions accordingly.
82
+ """
83
+
84
+ def __init__(self, device):
85
+ """
86
+ Initializes the PyboostOpHeaderGenerator with the appropriate templates for the specified device.
87
+
88
+ Args:
89
+ device (str): The target device (ascend, gpu, or cpu).
90
+
91
+ Raises:
92
+ ValueError: If the device is not supported.
93
+ """
94
+ template_dict = {"ascend": template.PYBOOST_ASCEND_OP_HEADER_TEMPLATE,
95
+ "gpu": template.PYBOOST_GPU_OP_HEADER_TEMPLATE,
96
+ "cpu": template.PYBOOST_CPU_OP_HEADER_TEMPLATE}
97
+ if device not in template_dict:
98
+ raise ValueError(
99
+ f"Device must be ascend, gpu, or cpu, {device} is not supported")
100
+ self.PYBOOST_OP_HEADER_TEMPLATE = template_dict[device]
101
+ self.code_generate_path = f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/"
102
+ self.hccl_code_generate_path = "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/"
103
+ self.device = device
104
+
105
+ def generate(self, work_path, op_protos):
106
+ """
107
+ Generates header files for the provided operator prototypes based on the device.
108
+
109
+ Args:
110
+ work_path (str): The directory path where the header files will be saved.
111
+ op_protos (list): A list of operator prototypes containing information about the operators.
112
+
113
+ Returns:
114
+ None
115
+ """
116
+ for op_proto in op_protos:
117
+ if op_proto.op_dispatch is None:
118
+ continue
119
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
120
+ continue
121
+ is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
122
+ op_parser = OpTemplateParser(op_proto)
123
+ op_name_str = op_proto.op_class.name
124
+ call_args_with_type = op_parser.parse_call_args_with_types()
125
+ cpp_func_return = _generate_cpp_func_return(op_proto)
126
+
127
+ pyboost_op_str = self.PYBOOST_OP_HEADER_TEMPLATE.replace(op_name=op_name_str,
128
+ op_name_upper=op_name_str.upper(),
129
+ operator_name=op_proto.op_name,
130
+ call_args_with_type=call_args_with_type,
131
+ return_type=cpp_func_return)
132
+
133
+ save_path = os.path.join(work_path, self.code_generate_path if not is_ascend_comm_op else self.hccl_code_generate_path)
134
+ file_name = f"{op_proto.op_name}.h"
135
+ save_file(save_path, file_name, pyboost_op_str)
136
+
137
+
138
+ class PyboostOpCppGenerator:
139
+ """
140
+ Generates C++ source files for PyBoost operations.
141
+
142
+ This class generates the implementation of operations for different devices, handling function calls
143
+ and registering custom kernels as necessary.
144
+ """
145
+
146
+ def __init__(self, device):
147
+ """
148
+ Initializes the PyboostOpCppGenerator with the appropriate templates for the specified device.
149
+
150
+ Args:
151
+ device (str): The target device (ascend, gpu, or cpu).
152
+
153
+ Raises:
154
+ ValueError: If the device is not supported.
155
+ """
156
+ if device == 'ascend':
157
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_ASCEND_CUSTOMIZE_CALL_TEMPLATE
158
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
159
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
160
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
161
+ elif device == 'cpu':
162
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_CPU_CUSTOMIZE_CALL_TEMPLATE
163
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
164
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
165
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
166
+ elif device == 'gpu':
167
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_GPU_CUSTOMIZE_CALL_TEMPLATE
168
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
169
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
170
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
171
+ else:
172
+ raise ValueError(
173
+ f"Device must be ascend, gpu, or cpu, {device} is not supported")
174
+ self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE = PYBOOST_CUSTOMIZE_CALL_TEMPLATE
175
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
176
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
177
+ self.PYBOOST_SINGLE_HCLL_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_HCLL_OP_HEADER_TEMPLATE
178
+ self.gen_path = gen_path
179
+ self.device = device
180
+
181
+ def generate_customize_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, merge_op_inc,
182
+ merge_op_hccl_header=None, merge_op_hccl_function=None, merge_op_hccl_inc=None):
183
+ """
184
+ Generate C++ code for PyBoost operations using the provided operation prototypes.
185
+
186
+ This method processes a list of operation prototypes, generates customized function call
187
+ implementations, and updates the merged headers and functions for the specified device.
188
+
189
+ Args:
190
+ op_protos (list): A list of operation prototypes to process. Each prototype contains
191
+ metadata about the operation, including dispatch settings and arguments.
192
+ merge_op_header (list): A list to store the generated C++ header code for operations.
193
+ merge_op_function (list): A list to store the generated C++ source code for operations.
194
+ """
195
+ for op_proto in op_protos:
196
+ if op_proto.op_dispatch is None:
197
+ continue
198
+ if getattr(op_proto.op_dispatch, self.device) == 'default':
199
+ continue
200
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
201
+ continue
202
+ is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
203
+ op_parser = OpTemplateParser(op_proto)
204
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
205
+ call_args_with_type = op_parser.parse_call_args_with_types()
206
+ _, call_func_outputs = op_parser.generate_pyboost_outputs()
207
+ operator_name = op_proto.op_name
208
+ op_name_str = op_proto.op_class.name
209
+ check_inplace_func = ''
210
+ for arg in op_proto.op_returns:
211
+ if arg.inplace != '':
212
+ check_inplace_func = f'ThrowExpectionWhenInternalOverlap({arg.inplace}_tensor);'
213
+ break
214
+ call_impl = self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE.replace(
215
+ call_args=call_args,
216
+ return_values=call_func_outputs,
217
+ customize_func=getattr(
218
+ op_proto.op_dispatch, self.device) + "Customize",
219
+ check_expression=check_inplace_func,
220
+ )
221
+ if is_ascend_comm_op and ((merge_op_hccl_header is None) or (merge_op_hccl_function is None)):
222
+ raise ValueError(f"merge_op_hccl_header and merge_op_hccl_function must be provided for comm op {operator_name}")
223
+
224
+ if is_ascend_comm_op:
225
+ customize_include = \
226
+ f'#include "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/{operator_name.lower()}.h"\n'
227
+ else:
228
+ customize_include = \
229
+ f'#include "{K.MS_OPS_KERNEL_PATH}/{self.device}/pyboost/customize/{operator_name.lower()}.h"\n'
230
+
231
+ register_custom = self._get_register_custom_kernel(op_proto)
232
+ cpp_func_return = _generate_cpp_func_return(op_proto)
233
+ if is_ascend_comm_op:
234
+ merge_op_hccl_header.append(self.PYBOOST_SINGLE_HCLL_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
235
+ customize_include=customize_include))
236
+ merge_op_hccl_function.append(
237
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
238
+ call_args_with_type=call_args_with_type,
239
+ return_type=cpp_func_return, call_impl=call_impl,
240
+ register_custom_kernel=register_custom))
241
+ merge_op_hccl_inc.append(op_name_str)
242
+ else:
243
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
244
+ customize_include=customize_include))
245
+ merge_op_function.append(
246
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
247
+ call_args_with_type=call_args_with_type,
248
+ return_type=cpp_func_return, call_impl=call_impl,
249
+ register_custom_kernel=register_custom))
250
+ merge_op_inc.append(op_name_str)
251
+
252
+ def _get_register_custom_kernel(self, op_proto: OpProto):
253
+ """
254
+ Generates the registration code for custom kernels based on the device.
255
+
256
+ Args:
257
+ op_proto (OpProto): The operator prototype to generate registration for.
258
+
259
+ Returns:
260
+ str: The registration code for the custom kernel.
261
+ """
262
+ if self.device == 'ascend':
263
+ register_custom_kernel = ''
264
+ elif self.device == 'cpu':
265
+ register_custom_kernel = f"MS_REG_PYBOOST_CPU_CUSTOM_KERNEL({op_proto.op_class.name});"
266
+ elif self.device == 'gpu':
267
+ register_custom_kernel = f"MS_REG_PYBOOST_GPU_CUSTOM_KERNEL({op_proto.op_class.name});"
268
+ else:
269
+ raise ValueError(
270
+ f"Device must be ascend, gpu, or cpu, {self.device} is not supported")
271
+ return register_custom_kernel
272
+
273
+
274
+ class PyboostViewOpCppGenerator:
275
+ """
276
+ Generates C++ source files for view operations in PyBoost.
277
+
278
+ This class handles the generation of source files for view operations, which have special handling
279
+ compared to regular operations.
280
+ """
281
+
282
+ def __init__(self, device):
283
+ """
284
+ Initializes the PyboostViewOpCppGenerator with the appropriate templates for the specified device.
285
+
286
+ Args:
287
+ device (str): The target device (ascend, gpu, or cpu).
288
+
289
+ Raises:
290
+ ValueError: If the device is not supported.
291
+ """
292
+ if device == 'ascend':
293
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_ASCEND_VIEW_CALL_TEMPLATE
294
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
295
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
296
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
297
+ elif device == 'cpu':
298
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_CPU_VIEW_CALL_TEMPLATE
299
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
300
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
301
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
302
+ elif device == 'gpu':
303
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_GPU_VIEW_CALL_TEMPLATE
304
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
305
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
306
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
307
+ else:
308
+ raise ValueError(
309
+ f"Device must be ascend, gpu, or cpu, {device} is not supported")
310
+ self.PYBOOST_VIEW_CALL_TEMPLATE = PYBOOST_VIEW_CALL_TEMPLATE
311
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
312
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
313
+ self.gen_path = gen_path
314
+ self.device = device
315
+
316
+ def generate_view_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, ascend_merge_op_inc):
317
+ """
318
+ Generate C++ code for view operations in PyBoost.
319
+
320
+ This method processes a list of operation prototypes (`op_protos`) and generates C++ code
321
+ for view operations where `op_view` is set to `True` and the dispatch setting for the target
322
+ device is `'default'`.
323
+
324
+ Args:
325
+ op_protos (list): A list of operation prototypes to process. Each prototype includes
326
+ metadata such as dispatch settings, arguments, and view-specific attributes.
327
+ merge_op_header (list): A list to store the generated C++ header code for view operations.
328
+ merge_op_function (list): A list to store the generated C++ source code for view operations.
329
+ """
330
+ for op_proto in op_protos:
331
+ if op_proto.op_dispatch is None:
332
+ continue
333
+ if getattr(op_proto.op_dispatch, self.device) != 'default':
334
+ continue
335
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
336
+ continue
337
+ if not op_proto.op_view:
338
+ continue
339
+
340
+ op_parser = OpTemplateParser(op_proto)
341
+ call_args_tensor = op_parser.get_call_args_tensor()
342
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
343
+ call_args_with_type = op_parser.parse_call_args_with_types()
344
+ _, call_func_outputs = op_parser.generate_pyboost_outputs()
345
+ call_impl = self.PYBOOST_VIEW_CALL_TEMPLATE.replace(op_name=op_proto.op_class.name,
346
+ call_args=call_args,
347
+ call_tensors=call_args_tensor,
348
+ return_values=call_func_outputs,
349
+ input=call_args[0])
350
+ customize_include = f'#include "{K.MS_OPS_VIEW_PATH}/{op_proto.op_name}_strides_calc.h"\n'
351
+ cpp_func_return = _generate_cpp_func_return(op_proto)
352
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
353
+ customize_include=customize_include))
354
+
355
+ merge_op_function.append(
356
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
357
+ call_args_with_type=call_args_with_type,
358
+ return_type=cpp_func_return,
359
+ call_impl=call_impl,
360
+ register_custom_kernel=""))
361
+ ascend_merge_op_inc.append(op_proto.op_class.name)
362
+
363
+
364
+ class AclnnOpCppCodeGenerator:
365
+ """
366
+ Generates C++ source files for ACLNN operations in PyBoost.
367
+
368
+ This class handles the generation of source files for operations that utilize the ACLNN framework,
369
+ including customized calls and tensor management.
370
+
371
+ Attributes:
372
+ PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
373
+ PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
374
+ gen_path (str): Path for saving the generated C++ source files.
375
+ device (str): The target device (ascend, cpu, or gpu).
376
+ """
377
+
378
+ def __init__(self, device):
379
+ """
380
+ Initializes the AclnnOpCppCodeGenerator with the appropriate templates for the specified device.
381
+
382
+ Args:
383
+ device (str): The target device (ascend, gpu, or cpu).
384
+
385
+ Raises:
386
+ ValueError: If the device is not supported.
387
+ """
388
+ if device == 'ascend':
389
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_ASCEND_CALL_TEMPLATE
390
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
391
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
392
+ elif device == 'cpu':
393
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_CPU_CALL_TEMPLATE
394
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
395
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
396
+ elif device == 'gpu':
397
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_GPU_CALL_TEMPLATE
398
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
399
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
400
+ else:
401
+ raise ValueError(
402
+ f"Device must be ascend, gpu, or cpu, {device} is not supported")
403
+ self.PYBOOST_CALL_TEMPLATE = PYBOOST_CALL_TEMPLATE
404
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.Template(
405
+ '#include "kernel/${device}/pyboost/auto_generate/${operator_name}.h"\n'
406
+ )
407
+
408
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
409
+ self.gen_path = gen_path
410
+ self.device = device
411
+
412
+ def generate_aclnn_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, ascend_merge_op_inc):
413
+ """
414
+ Generate C++ code for ACLNN operations in PyBoost.
415
+
416
+ This method processes a list of operation prototypes (`op_protos`) and generates C++ code
417
+ for aclnn operations. The method filters the operation
418
+ prototypes based on their dispatch and view settings, and then uses templates and metadata
419
+ to generate the necessary implementation and header files.
420
+
421
+ Args:
422
+ op_protos (list): A list of operation prototypes. Each prototype includes metadata
423
+ such as operation name, dispatch settings, view attributes, and arguments.
424
+ merge_op_header (list): A list to store the generated C++ header code for ACLNN operations.
425
+ merge_op_function (list): A list to store the generated C++ source code for ACLNN operations.
426
+ """
427
+ for op_proto in op_protos:
428
+ if op_proto.op_dispatch is None:
429
+ continue
430
+ if getattr(op_proto.op_dispatch, self.device) != 'default':
431
+ continue
432
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
433
+ continue
434
+ if op_proto.op_view:
435
+ continue
436
+
437
+ op_parser = OpTemplateParser(op_proto)
438
+ aclnn_name = AclnnUtils.get_aclnn_interface(op_proto.op_class.name)
439
+
440
+ call_args_tensor = op_parser.get_call_args_tensor()
441
+ create_input_address = self._generate_create_input_address(
442
+ op_parser)
443
+ malloc_inputs = self._generate_malloc_input(op_parser)
444
+ op_outputs, call_func_outputs = op_parser.generate_pyboost_outputs()
445
+ get_inputs_kernel_tensors = self._generate_get_inputs_kernel_tensors(
446
+ op_parser)
447
+
448
+ cube_math_type, get_cube_math_type = '', ''
449
+ if self.device == 'ascend' and is_cube(op_proto.op_class.name):
450
+ get_cube_math_type = f'// cubeMathType: 0 - KEEP_DTYPE, 1 - ALLOW_FP32_DOWN_PRECISION\n'
451
+ get_cube_math_type += "auto cube_math_type = GetCubeMathType();"
452
+ cube_math_type = ', cube_math_type'
453
+
454
+ real_output = ', ' + op_outputs \
455
+ if _generate_inplace_process_cpp_code(op_proto) == '' else ''
456
+
457
+ cast_input_code, real_call_args_tensor = self._generate_tensor_cpu_cast_input_code(
458
+ op_parser)
459
+ cpp_func_return = _generate_cpp_func_return(op_proto)
460
+ _, tensor_list_convert, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
461
+ call_args_after_convert, value_tuple_convert, const_number_convert = op_parser.op_args_converter()
462
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
463
+ call_args_with_type = op_parser.parse_call_args_with_types()
464
+ inplace_process = _generate_inplace_process_cpp_code(op_proto)
465
+ call_impl = self.PYBOOST_CALL_TEMPLATE.replace(aclnn_name=aclnn_name,
466
+ call_args=call_args,
467
+ call_tensors=call_args_tensor,
468
+ value_tuple_convert=value_tuple_convert,
469
+ const_number_convert=const_number_convert,
470
+ create_input_address=create_input_address,
471
+ tensor_list_convert=tensor_list_convert,
472
+ call_args_with_tensor=call_args_with_tensor,
473
+ malloc_inputs=malloc_inputs,
474
+ get_inputs_kernel_tensors=get_inputs_kernel_tensors,
475
+ get_cube_math_type=get_cube_math_type,
476
+ cube_math_type=cube_math_type,
477
+ real_call_args=call_args_after_convert,
478
+ return_values=call_func_outputs,
479
+ outputs=real_output,
480
+ inplace_process=inplace_process,
481
+ cast_input_code=cast_input_code,
482
+ real_call_args_tensor=real_call_args_tensor,
483
+ class_name=op_proto.op_class.name,
484
+ op_name_str=op_proto.op_class.name)
485
+
486
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
487
+ device=self.device))
488
+
489
+ merge_op_function.append(
490
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
491
+ call_args_with_type=call_args_with_type,
492
+ return_type=cpp_func_return,
493
+ call_impl=call_impl,
494
+ register_custom_kernel=''))
495
+ ascend_merge_op_inc.append(op_proto.op_class.name)
496
+
497
+ def _generate_tensor_cpu_cast_input_code(self, op_parser: OpTemplateParser):
498
+ """
499
+ Generates the input casting code for CPU tensor operations.
500
+
501
+ Args:
502
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
503
+
504
+ Returns:
505
+ tuple: A tuple containing the casting code and the updated tensor call arguments.
506
+ """
507
+ _, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
508
+ call_tensors = op_parser.get_call_args_tensor()
509
+ cast_input = ""
510
+ real_call_args_tensor = call_args_with_tensor.copy()
511
+ for i, tensor in enumerate(call_args_with_tensor):
512
+ is_tuple_tensor = real_call_args_tensor[i].endswith("_vector")
513
+ is_tensor = real_call_args_tensor[i] in call_tensors
514
+ if is_tensor:
515
+ cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
516
+ f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
517
+ real_call_args_tensor[i] = "real_" + real_call_args_tensor[i]
518
+ if is_tuple_tensor:
519
+ cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
520
+ f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
521
+ real_call_args_tensor[i] = "PyBoostUtils::ConvertTensorVectorToTuple(real_" + real_call_args_tensor[
522
+ i] + ")"
523
+ if cast_input != "":
524
+ cast_input = "auto &select_kernel = kernel_attr_pair.second;\n" + cast_input
525
+ return cast_input, real_call_args_tensor
526
+
527
+ def _generate_create_input_address(self, op_parser: OpTemplateParser):
528
+ need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
529
+ create_input_address = ''
530
+ args_list = ''
531
+ for item in need_malloc_tensors:
532
+ args_list += f'{item}, '
533
+ args_list = args_list[:-2]
534
+ if args_list:
535
+ create_input_address = f'PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), {args_list});\n'
536
+ return create_input_address
537
+
538
+ def _generate_malloc_input(self, op_parser: OpTemplateParser):
539
+ """
540
+ Generates the code for creating input addresses for tensors that need to be allocated.
541
+
542
+ Args:
543
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
544
+
545
+ Returns:
546
+ str: The generated code for creating input addresses.
547
+ """
548
+ need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
549
+ malloc_inputs = ''
550
+ args_list = ''
551
+ for item in need_malloc_tensors:
552
+ args_list += f'{item}, '
553
+ args_list = args_list[:-2]
554
+ if args_list:
555
+ malloc_inputs += f'PyBoostUtils::MallocOpInputs(device_context, {args_list});\n'
556
+ return malloc_inputs
557
+
558
+ def _generate_get_inputs_kernel_tensors(self, op_parser: OpTemplateParser):
559
+ """
560
+ Generates the code for retrieving input kernel tensors.
561
+
562
+ Args:
563
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
564
+
565
+ Returns:
566
+ str: The generated code for retrieving input kernel tensors.
567
+ """
568
+ _, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
569
+ inputs_kernel_tensors = ''
570
+ args_list = ''
571
+ for item in call_args_with_tensor:
572
+ args_list += f'{item}, '
573
+ args_list = args_list[:-2]
574
+ if args_list:
575
+ inputs_kernel_tensors += f'const auto &input_address_info = PyBoostUtils::GetAddressInfo(' \
576
+ f'device_context, op->stream_id(), op->input_abs(), {args_list});\n'
577
+ return inputs_kernel_tensors
578
+
579
+
580
+ class PyboostOpFunctionGenerator(BaseGenerator):
581
+ """
582
+ Generates C++ source files for ACLNN operations in PyBoost.
583
+
584
+ This class handles the generation of source files for operations that utilize the ACLNN framework,
585
+ including customized calls and tensor management.
586
+
587
+ Attributes:
588
+ PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
589
+ PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
590
+ gen_path (str): Path for saving the generated C++ source files.
591
+ device (str): The target device (ascend, cpu, or gpu).
592
+ """
593
+
594
+ def __init__(self):
595
+ self.ascend_op_cpp_generator = PyboostOpCppGenerator('ascend')
596
+ self.ascend_view_op_cpp_generator = PyboostViewOpCppGenerator('ascend')
597
+ self.ascend_aclnn_cpp_generator = AclnnOpCppCodeGenerator('ascend')
598
+
599
+ self.cpu_op_cpp_generator = PyboostOpCppGenerator('cpu')
600
+ self.cpu_view_op_cpp_generator = PyboostViewOpCppGenerator('cpu')
601
+ self.cpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('cpu')
602
+
603
+ self.gpu_op_cpp_generator = PyboostOpCppGenerator('gpu')
604
+ self.gpu_view_op_cpp_generator = PyboostViewOpCppGenerator('gpu')
605
+ self.gpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('gpu')
606
+
607
+ self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE
608
+ self.PYBOOST_CPU_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_OP_SOURCE_TEMPLATE
609
+ self.PYBOOST_GPU_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_OP_SOURCE_TEMPLATE
610
+ self.ascend_gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
611
+ self.cpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
612
+ self.gpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
613
+ self.hccl_gen_path = "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/"
614
+
615
+ def generate(self, work_path, op_protos):
616
+ """
617
+ Generate and save C++ source code for PyBoost operations across different devices.
618
+
619
+ This method generates C++ source files for operations (`op_protos`) tailored to Ascend, CPU,
620
+ and GPU devices. It combines headers and function implementations for each device, and then
621
+ saves the final source files to the appropriate paths.
622
+
623
+ Args:
624
+ op_protos (list): A list of operation prototypes containing metadata such as
625
+ operation name, dispatch settings, arguments, and view attributes.
626
+ work_path (str): The base working directory where the generated files will be saved.
627
+
628
+ Generated Files:
629
+ - Ascend: `pyboost_ascend_ops.cc`
630
+ - CPU: `pyboost_cpu_ops.cc`
631
+ - GPU: `pyboost_gpu_ops.cc`
632
+ """
633
+ self._generate_pyboost_ascend_ops(work_path, op_protos)
634
+ self._generate_pyboost_cpu_ops(work_path, op_protos)
635
+ self._generate_pyboost_gpu_ops(work_path, op_protos)
636
+
637
+ def _generate_pyboost_ascend_ops(self, work_path, op_protos):
638
+ """
639
+ Generates Ascend PyBoost ops functions source files after being merged into specific chunk sizes.
640
+
641
+ Args:
642
+ work_path (str): The directory path where the generated C++ source files will be saved.
643
+ op_protos (list): A list of operation prototypes that define the operations for which
644
+ the C++ code will be generated.
645
+ """
646
+ ascend_merge_op_header = []
647
+ ascend_merge_op_function = []
648
+ hccl_merge_op_header = []
649
+ hccl_merge_op_function = []
650
+ ascend_merge_op_inc = []
651
+ ascend_merge_op_hccl_inc = []
652
+ self.ascend_op_cpp_generator.generate_customize_op_cpp_code(op_protos, ascend_merge_op_header,
653
+ ascend_merge_op_function, ascend_merge_op_inc,
654
+ hccl_merge_op_header, hccl_merge_op_function,
655
+ ascend_merge_op_hccl_inc)
656
+ self.ascend_view_op_cpp_generator.generate_view_op_cpp_code(op_protos, ascend_merge_op_header,
657
+ ascend_merge_op_function,
658
+ ascend_merge_op_inc)
659
+ self.ascend_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, ascend_merge_op_header,
660
+ ascend_merge_op_function,
661
+ ascend_merge_op_inc)
662
+
663
+ ascend_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
664
+ ascend_merge_op_header, chunk_size=120)
665
+ ascend_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
666
+ ascend_merge_op_function, chunk_size=120)
667
+ op_inc_list = chunk_list(ascend_merge_op_inc, n=120)
668
+
669
+ new_gen_num = len(ascend_op_header_merge_by_chunk_size)
670
+ self._delete_residual_merged_ops_files(os.path.join(
671
+ work_path, self.ascend_gen_path), new_gen_num)
672
+
673
+ for i, op_header, op_function in zip(range(len(ascend_op_header_merge_by_chunk_size)),
674
+ ascend_op_header_merge_by_chunk_size,
675
+ ascend_op_function_merge_by_chunk_size):
676
+ ops_inc_head_set = set()
677
+ for op_name_inc in op_inc_list[i]:
678
+ ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
679
+
680
+ ascend_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
681
+ merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
682
+ save_file(os.path.join(work_path, self.ascend_gen_path), f"pyboost_ascend_ops_{i}.cc",
683
+ ascend_pyboost_op_source)
684
+
685
+ ops_hccl_inc_head_set = set()
686
+ for op_name_inc in ascend_merge_op_hccl_inc:
687
+ ops_hccl_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
688
+ hccl_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
689
+ merge_op_header='\n'.join(hccl_merge_op_header), merge_op_function='\n'.join(hccl_merge_op_function),
690
+ ops_inc=list(sorted(ops_hccl_inc_head_set)))
691
+ save_file(os.path.join(work_path, self.hccl_gen_path), f"pyboost_hccl_ops.cc",
692
+ hccl_pyboost_op_source)
693
+
694
+ def _generate_pyboost_cpu_ops(self, work_path, op_protos):
695
+ """
696
+ Generates CPU PyBoost ops functions source files after being merged into specific chunk sizes.
697
+
698
+ Args:
699
+ work_path (str): The directory path where the generated C++ source files will be saved.
700
+ op_protos (list): A list of operation prototypes that define the operations for which
701
+ the C++ code will be generated.
702
+ """
703
+ cpu_merge_op_header = []
704
+ cpu_merge_op_function = []
705
+ cpu_merge_op_inc = []
706
+ self.cpu_op_cpp_generator.generate_customize_op_cpp_code(
707
+ op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
708
+ self.cpu_view_op_cpp_generator.generate_view_op_cpp_code(
709
+ op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
710
+ self.cpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(
711
+ op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
712
+ cpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
713
+ cpu_merge_op_header, chunk_size=120)
714
+ cpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
715
+ cpu_merge_op_function, chunk_size=120)
716
+ op_inc_list = chunk_list(cpu_merge_op_inc, n=120)
717
+
718
+ new_gen_num = len(cpu_op_header_merge_by_chunk_size)
719
+ self._delete_residual_merged_ops_files(
720
+ os.path.join(work_path, self.cpu_gen_path), new_gen_num)
721
+
722
+ for i, op_header, op_function in zip(range(len(cpu_op_header_merge_by_chunk_size)),
723
+ cpu_op_header_merge_by_chunk_size,
724
+ cpu_op_function_merge_by_chunk_size):
725
+ ops_inc_head_set = set()
726
+ for op_name_inc in op_inc_list[i]:
727
+ ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
728
+ cpu_pyboost_op_source = self.PYBOOST_CPU_OP_SOURCE_TEMPLATE.replace(
729
+ merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
730
+ save_file(os.path.join(work_path, self.cpu_gen_path), f"pyboost_cpu_ops_{i}.cc",
731
+ cpu_pyboost_op_source)
732
+
733
+ def _generate_pyboost_gpu_ops(self, work_path, op_protos):
734
+ """
735
+ Generates GPU PyBoost ops functions source files after being merged into specific chunk sizes.
736
+
737
+ Args:
738
+ work_path (str): The directory path where the generated C++ source files will be saved.
739
+ op_protos (list): A list of operation prototypes that define the operations for which
740
+ the C++ code will be generated.
741
+ """
742
+ gpu_merge_op_header = []
743
+ gpu_merge_op_function = []
744
+ gpu_merge_op_inc = []
745
+ self.gpu_op_cpp_generator.generate_customize_op_cpp_code(
746
+ op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
747
+ self.gpu_view_op_cpp_generator.generate_view_op_cpp_code(
748
+ op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
749
+ self.gpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(
750
+ op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
751
+ gpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
752
+ gpu_merge_op_header, chunk_size=120)
753
+ gpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
754
+ gpu_merge_op_function, chunk_size=120)
755
+ op_inc_list = chunk_list(gpu_merge_op_inc, n=120)
756
+
757
+ new_gen_num = len(gpu_op_header_merge_by_chunk_size)
758
+ self._delete_residual_merged_ops_files(
759
+ os.path.join(work_path, self.gpu_gen_path), new_gen_num)
760
+
761
+ for i, op_header, op_function in zip(range(len(gpu_op_header_merge_by_chunk_size)),
762
+ gpu_op_header_merge_by_chunk_size,
763
+ gpu_op_function_merge_by_chunk_size):
764
+ ops_inc_head_set = set()
765
+ for op_name_inc in op_inc_list[i]:
766
+ ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
767
+ gpu_pyboost_op_source = self.PYBOOST_GPU_OP_SOURCE_TEMPLATE.replace(
768
+ merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
769
+ save_file(os.path.join(work_path, self.gpu_gen_path), f"pyboost_gpu_ops_{i}.cc",
770
+ gpu_pyboost_op_source)
771
+
772
+ def _delete_residual_merged_ops_files(self, files_path, new_gen_num):
773
+ """
774
+ Deletes residual merged operation files in the specified directory if the number of
775
+ newly generated files does not match the number of existing ones.
776
+
777
+ This method first lists all files in the specified directory, then filters out the files
778
+ that match the pattern `pyboost_.*_ops_.*.cc` (i.e., files related to pyboost ops). It compares
779
+ the number of such files (`old_files_num`) with the `new_gen_num` argument, which represents
780
+ the expected number of new pyboost ops files. If the counts do not match, the method will
781
+ delete all the existing pyboost ops files in the directory before any new ones can be generated.
782
+
783
+ Args:
784
+ files_path (str): The path to the directory containing the files to be checked and deleted.
785
+ new_gen_num (int): The number of newly generated pyboost ops files expected to be in the directory.
786
+
787
+ Returns:
788
+ None
789
+ """
790
+ all_files = os.listdir(files_path)
791
+ old_pyboost_ops_files = [file for file in all_files if re.match(
792
+ r'pyboost_.*_ops_.*\.cc', file)]
793
+ old_files_num = len(old_pyboost_ops_files)
794
+ if new_gen_num != old_files_num:
795
+ for file in old_pyboost_ops_files:
796
+ os.remove(os.path.join(files_path, file))
797
+
798
+
799
+ def _generate_cpp_func_return(op_proto):
800
+ """Generates the C++ return type for the given operator prototype.
801
+
802
+ Args:
803
+ op_proto (OpProto): The operator prototype containing return information.
804
+
805
+ Returns:
806
+ str: The C++ return type for the function based on the operator prototype.
807
+
808
+ Raises:
809
+ Exception: If no return type is found.
810
+ """
811
+ returns_type = []
812
+ type_convert_to_base = {
813
+ 'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::BaseTensorPtr>',
814
+ 'mindspore::tensor::TensorPtr': 'mindspore::tensor::BaseTensorPtr'
815
+ }
816
+ for return_obj in op_proto.op_returns:
817
+ temp_return = get_return_type(return_obj.arg_dtype)
818
+ if temp_return in type_convert_to_base:
819
+ returns_type.append(type_convert_to_base[temp_return])
820
+ else:
821
+ raise Exception("Not return found")
822
+ if len(returns_type) == 1:
823
+ cpp_func_return = returns_type[0]
824
+ elif len(returns_type) > 1:
825
+ cpp_func_return = "std::tuple<"
826
+ cpp_func_return += ','.join(s for s in returns_type)
827
+ cpp_func_return += ">"
828
+ else:
829
+ raise Exception("Not return found")
830
+ return cpp_func_return
831
+
832
+
833
+ def _generate_inplace_process_cpp_code(op_proto):
834
+ """Generates C++ code for updating outputs by input tensors for inplace processing.
835
+
836
+ Args:
837
+ op_proto (OpProto): The operator prototype containing return information.
838
+
839
+ Returns:
840
+ str: The C++ code for inplace processing, or an empty string if no inplace processing is needed.
841
+ """
842
+ inplace_process = f'// RefOps update output by input tensor\n'
843
+ has_ref = False
844
+ for index, return_obj in enumerate(op_proto.op_returns):
845
+ if return_obj.inplace != '':
846
+ inplace_process += f'outputs_[{index}]->set_device_address(' \
847
+ f'{return_obj.inplace}_tensor->device_address()); '
848
+ has_ref = True
849
+ break
850
+ if has_ref:
851
+ return inplace_process
852
+ return ''
853
+
854
+
855
+ def delete_residual_files(work_path, op_protos):
856
+ """
857
+ Deletes residual files generated for operator prototypes that are no longer needed.
858
+
859
+ Args:
860
+ work_path (str): The base directory path where generated files are located.
861
+ op_protos (list): A list of operator prototypes that are currently valid.
862
+
863
+ Returns:
864
+ None
865
+ """
866
+ all_operator_name = []
867
+ for op_proto in op_protos:
868
+ all_operator_name.append(op_proto.op_name)
869
+ code_generate_path_list = [f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/" for device in
870
+ ["ascend", "gpu", "cpu"]]
871
+ code_generate_path_list.append(
872
+ f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
873
+ for code_generate_path in code_generate_path_list:
874
+ filter_files = []
875
+ code_generate_path = os.path.join(work_path, code_generate_path)
876
+ if os.path.exists(code_generate_path):
877
+ all_files = os.listdir(code_generate_path)
878
+ # No need to delete pyboost_.*_ops_.*.cc files and op_register.cc.
879
+ # These residual files will be deleted before new files generate.
880
+ filter_files = [file for file in all_files if
881
+ not re.match(r'pyboost_.*_ops_.*\.cc', file) and file != "op_register.cc"]
882
+ registered_op_name = set(item.split(".")[0] for item in filter_files)
883
+ need_clean_op = registered_op_name - set(all_operator_name)
884
+
885
+ for file in filter_files:
886
+ file_name = file.split(".")[0]
887
+ if file_name in need_clean_op:
888
+ file_path = os.path.join(code_generate_path, file)
889
+ if os.path.exists(file_path):
890
+ os.remove(file_path)
891
+
892
+
893
+ class PyboostOpRegisterCppCodeGenerator:
894
+ """
895
+ Generates registration C++ code for PyBoost operations.
896
+
897
+ This class is responsible for creating a registration source file that includes
898
+ all the necessary headers and template instantiations for the registered operations.
899
+
900
+ Attributes:
901
+ PYBOOST_OP_REGISTER_TEMPLATE (Template): Template for generating the operation registration code.
902
+ """
903
+
904
+ def __init__(self):
905
+ self.PYBOOST_OP_REGISTER_TEMPLATE = template.PYBOOST_OP_REGISTER_TEMPLATE
906
+
907
+ def generate(self, work_path, op_protos):
908
+ """
909
+ Generates a C++ source file for registering all PyBoost operations.
910
+
911
+ Args:
912
+ work_path (str): The directory path where the registration file will be saved.
913
+ op_protos (list): A list of operator prototypes containing information about the operations.
914
+
915
+ Returns:
916
+ None
917
+ """
918
+ all_op_names = []
919
+ all_functional_names = []
920
+ for op_proto in op_protos:
921
+ if op_proto.op_dispatch is None:
922
+ continue
923
+ functional_name = op_proto.op_name
924
+ op_name_str = op_proto.op_class.name
925
+ all_op_names.append(op_name_str)
926
+ all_functional_names.append(functional_name)
927
+
928
+ include_str = ''
929
+ factory_str = ''
930
+ for op_name in all_op_names:
931
+ factory_str += "template class OpFactory<{0}>;\n".format(op_name)
932
+ for operator_name in all_functional_names:
933
+ include_str += f'#include "{K.MS_PYBOOST_BASE_PATH}/auto_generate/{operator_name}.h"\n'
934
+ op_register_file_str = self.PYBOOST_OP_REGISTER_TEMPLATE.replace(op_includes=include_str,
935
+ op_factory_templates=factory_str)
936
+ save_path = os.path.join(work_path, f"{K.MS_PYBOOST_BASE_PATH}/auto_generate/")
937
+ file_name = "op_register.cc"
938
+ save_file(save_path, file_name, op_register_file_str)