mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.5.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (366) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +8 -3
  3. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +0 -5
  7. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  8. mindspore/_extends/parse/compile_config.py +64 -0
  9. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  10. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
  11. mindspore/_extends/parse/parser.py +23 -5
  12. mindspore/_extends/parse/standard_method.py +123 -27
  13. mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
  14. mindspore/amp.py +7 -1
  15. mindspore/avcodec-59.dll +0 -0
  16. mindspore/avdevice-59.dll +0 -0
  17. mindspore/avfilter-8.dll +0 -0
  18. mindspore/avformat-59.dll +0 -0
  19. mindspore/avutil-57.dll +0 -0
  20. mindspore/boost/boost_cell_wrapper.py +136 -41
  21. mindspore/common/__init__.py +3 -1
  22. mindspore/common/_register_for_tensor.py +0 -1
  23. mindspore/common/_stub_tensor.py +25 -4
  24. mindspore/common/_tensor_cpp_method.py +17 -0
  25. mindspore/common/_tensor_docs.py +6132 -0
  26. mindspore/common/api.py +98 -21
  27. mindspore/common/dtype.py +34 -34
  28. mindspore/common/dump.py +2 -1
  29. mindspore/common/file_system.py +8 -3
  30. mindspore/common/generator.py +2 -0
  31. mindspore/common/hook_handle.py +3 -1
  32. mindspore/common/initializer.py +3 -4
  33. mindspore/common/lazy_inline.py +8 -2
  34. mindspore/common/mindir_util.py +10 -2
  35. mindspore/common/parameter.py +31 -15
  36. mindspore/common/tensor.py +713 -1337
  37. mindspore/communication/__init__.py +1 -1
  38. mindspore/communication/_comm_helper.py +5 -0
  39. mindspore/communication/comm_func.py +215 -173
  40. mindspore/communication/management.py +23 -20
  41. mindspore/context.py +285 -191
  42. mindspore/dataset/__init__.py +23 -19
  43. mindspore/dataset/callback/ds_callback.py +2 -1
  44. mindspore/dataset/core/config.py +84 -3
  45. mindspore/dataset/engine/cache_admin.py +3 -3
  46. mindspore/dataset/engine/cache_client.py +5 -4
  47. mindspore/dataset/engine/datasets.py +192 -149
  48. mindspore/dataset/engine/datasets_audio.py +14 -0
  49. mindspore/dataset/engine/datasets_standard_format.py +11 -11
  50. mindspore/dataset/engine/datasets_text.py +38 -1
  51. mindspore/dataset/engine/datasets_user_defined.py +100 -66
  52. mindspore/dataset/engine/datasets_vision.py +81 -8
  53. mindspore/dataset/engine/iterators.py +281 -63
  54. mindspore/dataset/engine/obs/util.py +8 -0
  55. mindspore/dataset/engine/queue.py +40 -0
  56. mindspore/dataset/engine/samplers.py +26 -2
  57. mindspore/dataset/engine/serializer_deserializer.py +1 -1
  58. mindspore/dataset/engine/validators.py +43 -11
  59. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  60. mindspore/dataset/transforms/transforms.py +29 -12
  61. mindspore/dataset/vision/validators.py +1 -2
  62. mindspore/device_context/__init__.py +21 -0
  63. mindspore/device_context/ascend/__init__.py +25 -0
  64. mindspore/device_context/ascend/device.py +72 -0
  65. mindspore/device_context/ascend/op_debug.py +94 -0
  66. mindspore/device_context/ascend/op_precision.py +193 -0
  67. mindspore/device_context/ascend/op_tuning.py +127 -0
  68. mindspore/device_context/cpu/__init__.py +25 -0
  69. mindspore/device_context/cpu/device.py +62 -0
  70. mindspore/device_context/cpu/op_tuning.py +43 -0
  71. mindspore/device_context/gpu/__init__.py +21 -0
  72. mindspore/device_context/gpu/device.py +70 -0
  73. mindspore/device_context/gpu/op_precision.py +67 -0
  74. mindspore/device_context/gpu/op_tuning.py +175 -0
  75. mindspore/device_manager.py +134 -0
  76. mindspore/dnnl.dll +0 -0
  77. mindspore/experimental/llm_boost/__init__.py +1 -0
  78. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  79. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  80. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  81. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  82. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  83. mindspore/experimental/llm_boost/register.py +1 -0
  84. mindspore/experimental/optim/adadelta.py +26 -22
  85. mindspore/experimental/optim/adam.py +3 -0
  86. mindspore/experimental/optim/lr_scheduler.py +33 -24
  87. mindspore/experimental/optim/radam.py +33 -30
  88. mindspore/hal/device.py +28 -0
  89. mindspore/hal/event.py +17 -0
  90. mindspore/hal/memory.py +94 -3
  91. mindspore/hal/stream.py +91 -6
  92. mindspore/include/api/context.h +0 -1
  93. mindspore/jpeg62.dll +0 -0
  94. mindspore/log.py +12 -0
  95. mindspore/mindrecord/__init__.py +1 -1
  96. mindspore/mindrecord/config.py +17 -316
  97. mindspore/mindrecord/filereader.py +1 -9
  98. mindspore/mindrecord/filewriter.py +5 -15
  99. mindspore/mindrecord/mindpage.py +1 -9
  100. mindspore/mindspore_backend.dll +0 -0
  101. mindspore/mindspore_common.dll +0 -0
  102. mindspore/mindspore_core.dll +0 -0
  103. mindspore/mindspore_glog.dll +0 -0
  104. mindspore/mindspore_ops.dll +0 -0
  105. mindspore/mint/__init__.py +824 -218
  106. mindspore/mint/distributed/__init__.py +66 -4
  107. mindspore/mint/distributed/distributed.py +2594 -44
  108. mindspore/mint/linalg/__init__.py +6 -0
  109. mindspore/mint/nn/__init__.py +473 -14
  110. mindspore/mint/nn/functional.py +486 -11
  111. mindspore/mint/nn/layer/__init__.py +17 -4
  112. mindspore/mint/nn/layer/_functions.py +330 -0
  113. mindspore/mint/nn/layer/activation.py +169 -1
  114. mindspore/mint/nn/layer/basic.py +123 -0
  115. mindspore/mint/nn/layer/conv.py +727 -0
  116. mindspore/mint/nn/layer/normalization.py +215 -19
  117. mindspore/mint/nn/layer/padding.py +797 -0
  118. mindspore/mint/nn/layer/pooling.py +170 -0
  119. mindspore/mint/optim/__init__.py +2 -1
  120. mindspore/mint/optim/adam.py +223 -0
  121. mindspore/mint/optim/adamw.py +26 -19
  122. mindspore/mint/special/__init__.py +2 -1
  123. mindspore/multiprocessing/__init__.py +5 -0
  124. mindspore/nn/cell.py +126 -19
  125. mindspore/nn/dynamic_lr.py +2 -1
  126. mindspore/nn/layer/activation.py +6 -6
  127. mindspore/nn/layer/basic.py +35 -25
  128. mindspore/nn/layer/channel_shuffle.py +3 -3
  129. mindspore/nn/layer/embedding.py +3 -3
  130. mindspore/nn/layer/normalization.py +8 -7
  131. mindspore/nn/layer/padding.py +4 -3
  132. mindspore/nn/layer/pooling.py +47 -13
  133. mindspore/nn/layer/rnn_cells.py +1 -1
  134. mindspore/nn/layer/rnns.py +2 -1
  135. mindspore/nn/layer/timedistributed.py +5 -5
  136. mindspore/nn/layer/transformer.py +48 -26
  137. mindspore/nn/learning_rate_schedule.py +5 -3
  138. mindspore/nn/loss/loss.py +31 -36
  139. mindspore/nn/optim/ada_grad.py +1 -0
  140. mindspore/nn/optim/adadelta.py +2 -2
  141. mindspore/nn/optim/adam.py +1 -1
  142. mindspore/nn/optim/lars.py +1 -4
  143. mindspore/nn/optim/optimizer.py +1 -1
  144. mindspore/nn/optim/rprop.py +2 -2
  145. mindspore/nn/optim/thor.py +2 -1
  146. mindspore/nn/utils/init.py +13 -11
  147. mindspore/nn/wrap/cell_wrapper.py +4 -6
  148. mindspore/nn/wrap/loss_scale.py +3 -4
  149. mindspore/numpy/array_creations.py +60 -62
  150. mindspore/numpy/array_ops.py +148 -143
  151. mindspore/numpy/logic_ops.py +41 -42
  152. mindspore/numpy/math_ops.py +361 -359
  153. mindspore/numpy/utils.py +16 -16
  154. mindspore/numpy/utils_const.py +4 -4
  155. mindspore/opencv_core452.dll +0 -0
  156. mindspore/opencv_imgcodecs452.dll +0 -0
  157. mindspore/opencv_imgproc452.dll +0 -0
  158. mindspore/ops/__init__.py +2 -1
  159. mindspore/ops/_grad_experimental/grad_comm_ops.py +94 -13
  160. mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
  161. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  162. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  163. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  164. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  165. mindspore/ops/_vmap/vmap_array_ops.py +20 -19
  166. mindspore/ops/_vmap/vmap_base.py +0 -2
  167. mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
  168. mindspore/ops/_vmap/vmap_math_ops.py +11 -9
  169. mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
  170. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
  171. mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
  172. mindspore/ops/auto_generate/gen_extend_func.py +554 -60
  173. mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
  174. mindspore/ops/auto_generate/gen_ops_prim.py +8024 -3409
  175. mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
  176. mindspore/ops/composite/base.py +1 -1
  177. mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
  178. mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
  179. mindspore/ops/function/__init__.py +12 -0
  180. mindspore/ops/function/array_func.py +561 -159
  181. mindspore/ops/function/clip_func.py +64 -0
  182. mindspore/ops/function/debug_func.py +28 -20
  183. mindspore/ops/function/image_func.py +1 -1
  184. mindspore/ops/function/linalg_func.py +5 -4
  185. mindspore/ops/function/math_func.py +1659 -290
  186. mindspore/ops/function/nn_func.py +988 -317
  187. mindspore/ops/function/parameter_func.py +3 -56
  188. mindspore/ops/function/random_func.py +243 -33
  189. mindspore/ops/function/sparse_unary_func.py +1 -1
  190. mindspore/ops/functional.py +18 -5
  191. mindspore/ops/functional_overload.py +897 -0
  192. mindspore/ops/operations/__init__.py +3 -2
  193. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  194. mindspore/ops/operations/_grad_ops.py +2 -34
  195. mindspore/ops/operations/_infer_ops.py +2 -1
  196. mindspore/ops/operations/_inner_ops.py +38 -8
  197. mindspore/ops/operations/array_ops.py +45 -303
  198. mindspore/ops/operations/comm_ops.py +19 -16
  199. mindspore/ops/operations/custom_ops.py +11 -55
  200. mindspore/ops/operations/debug_ops.py +42 -47
  201. mindspore/ops/operations/inner_ops.py +6 -4
  202. mindspore/ops/operations/linalg_ops.py +3 -2
  203. mindspore/ops/operations/manually_defined/ops_def.py +185 -104
  204. mindspore/ops/operations/math_ops.py +11 -216
  205. mindspore/ops/operations/nn_ops.py +146 -308
  206. mindspore/ops/primitive.py +23 -21
  207. mindspore/ops/tensor_method.py +1669 -0
  208. mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
  209. mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
  210. mindspore/ops_generate/arg_handler.py +0 -61
  211. mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
  212. mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
  213. mindspore/ops_generate/base_generator.py +11 -0
  214. mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
  215. mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
  216. mindspore/ops_generate/functional_overload_py_generator.py +110 -0
  217. mindspore/ops_generate/functions_cc_generator.py +233 -0
  218. mindspore/ops_generate/gen_aclnn_implement.py +110 -114
  219. mindspore/ops_generate/gen_constants.py +157 -3
  220. mindspore/ops_generate/gen_ops.py +245 -990
  221. mindspore/ops_generate/gen_pyboost_func.py +97 -998
  222. mindspore/ops_generate/gen_utils.py +119 -33
  223. mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
  224. mindspore/ops_generate/op_api_proto.py +206 -0
  225. mindspore/ops_generate/op_def_py_generator.py +131 -0
  226. mindspore/ops_generate/op_prim_py_generator.py +480 -0
  227. mindspore/ops_generate/op_proto.py +373 -108
  228. mindspore/ops_generate/op_template_parser.py +436 -0
  229. mindspore/ops_generate/ops_def_cc_generator.py +288 -0
  230. mindspore/ops_generate/ops_def_h_generator.py +74 -0
  231. mindspore/ops_generate/ops_name_h_generator.py +68 -0
  232. mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
  233. mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
  234. mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
  235. mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
  236. mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
  237. mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
  238. mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
  239. mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
  240. mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
  241. mindspore/ops_generate/pyboost_utils.py +92 -33
  242. mindspore/ops_generate/template.py +294 -44
  243. mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
  244. mindspore/parallel/__init__.py +3 -3
  245. mindspore/parallel/_auto_parallel_context.py +24 -33
  246. mindspore/parallel/_parallel_serialization.py +13 -2
  247. mindspore/parallel/_utils.py +4 -1
  248. mindspore/parallel/algo_parameter_config.py +1 -1
  249. mindspore/parallel/checkpoint_transform.py +44 -0
  250. mindspore/parallel/cluster/process_entity/_api.py +131 -37
  251. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  252. mindspore/parallel/cluster/run.py +20 -3
  253. mindspore/parallel/parameter_broadcast.py +1 -1
  254. mindspore/parallel/shard.py +3 -0
  255. mindspore/parallel/transform_safetensors.py +119 -253
  256. mindspore/profiler/__init__.py +17 -4
  257. mindspore/profiler/analysis/__init__.py +0 -0
  258. mindspore/profiler/analysis/parser/__init__.py +0 -0
  259. mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
  260. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  261. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  262. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  263. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  264. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  265. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
  266. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  267. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
  268. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  269. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  270. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  271. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  272. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  273. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  274. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  275. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  276. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  277. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  278. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
  279. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  280. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  281. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  282. mindspore/profiler/analysis/task_manager.py +131 -0
  283. mindspore/profiler/analysis/time_converter.py +84 -0
  284. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  285. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
  286. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  287. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
  288. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
  289. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
  290. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
  291. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  292. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  293. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
  294. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  295. mindspore/profiler/analysis/work_flow.py +73 -0
  296. mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
  297. mindspore/profiler/common/command_executor.py +90 -0
  298. mindspore/profiler/common/constant.py +174 -3
  299. mindspore/profiler/common/file_manager.py +208 -0
  300. mindspore/profiler/common/log.py +130 -0
  301. mindspore/profiler/common/msprof_cmd_tool.py +202 -0
  302. mindspore/profiler/common/path_manager.py +371 -0
  303. mindspore/profiler/common/process_bar.py +168 -0
  304. mindspore/profiler/common/process_pool.py +9 -3
  305. mindspore/profiler/common/profiler_context.py +476 -0
  306. mindspore/profiler/common/profiler_info.py +304 -0
  307. mindspore/profiler/common/profiler_output_path.py +284 -0
  308. mindspore/profiler/common/profiler_parameters.py +210 -0
  309. mindspore/profiler/common/profiler_path_manager.py +120 -0
  310. mindspore/profiler/common/record_function.py +76 -0
  311. mindspore/profiler/common/tlv_decoder.py +76 -0
  312. mindspore/profiler/common/util.py +75 -2
  313. mindspore/profiler/dynamic_profiler.py +270 -37
  314. mindspore/profiler/envprofiler.py +138 -0
  315. mindspore/profiler/mstx.py +199 -0
  316. mindspore/profiler/platform/__init__.py +21 -0
  317. mindspore/profiler/platform/base_profiler.py +40 -0
  318. mindspore/profiler/platform/cpu_profiler.py +124 -0
  319. mindspore/profiler/platform/gpu_profiler.py +74 -0
  320. mindspore/profiler/platform/npu_profiler.py +309 -0
  321. mindspore/profiler/profiler.py +580 -93
  322. mindspore/profiler/profiler_action_controller.py +187 -0
  323. mindspore/profiler/profiler_interface.py +114 -0
  324. mindspore/profiler/schedule.py +208 -0
  325. mindspore/rewrite/api/symbol_tree.py +1 -2
  326. mindspore/run_check/_check_version.py +2 -6
  327. mindspore/runtime/__init__.py +37 -0
  328. mindspore/runtime/device.py +27 -0
  329. mindspore/runtime/event.py +209 -0
  330. mindspore/runtime/executor.py +148 -0
  331. mindspore/runtime/memory.py +392 -0
  332. mindspore/runtime/stream.py +460 -0
  333. mindspore/runtime/thread_bind_core.py +401 -0
  334. mindspore/swresample-4.dll +0 -0
  335. mindspore/swscale-6.dll +0 -0
  336. mindspore/tinyxml2.dll +0 -0
  337. mindspore/train/__init__.py +2 -2
  338. mindspore/train/_utils.py +53 -18
  339. mindspore/train/amp.py +8 -4
  340. mindspore/train/callback/_checkpoint.py +32 -18
  341. mindspore/train/callback/_early_stop.py +1 -1
  342. mindspore/train/callback/_flops_collector.py +105 -69
  343. mindspore/train/callback/_history.py +1 -1
  344. mindspore/train/callback/_summary_collector.py +44 -6
  345. mindspore/train/callback/_tft_register.py +31 -10
  346. mindspore/train/dataset_helper.py +11 -11
  347. mindspore/train/metrics/precision.py +4 -5
  348. mindspore/train/mind_ir_pb2.py +167 -46
  349. mindspore/train/model.py +13 -15
  350. mindspore/train/serialization.py +462 -76
  351. mindspore/train/summary/summary_record.py +1 -2
  352. mindspore/train/train_thor/model_thor.py +1 -1
  353. mindspore/turbojpeg.dll +0 -0
  354. mindspore/utils/__init__.py +4 -2
  355. mindspore/utils/dryrun.py +138 -0
  356. mindspore/utils/runtime_execution_order_check.py +550 -0
  357. mindspore/version.py +1 -1
  358. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/METADATA +2 -3
  359. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/RECORD +362 -238
  360. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
  361. mindspore/common/_tensor_overload.py +0 -139
  362. mindspore/mindspore_np_dtype.dll +0 -0
  363. mindspore/profiler/envprofiling.py +0 -254
  364. mindspore/profiler/profiling.py +0 -1926
  365. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
  366. {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,851 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """
16
+ This module defines several classes and functions for generating C++ code for PyBoost operations,
17
+ including function headers, source files, and registration code. It handles the generation of code
18
+ for different devices (Ascend, CPU, GPU) and manages residual files associated with operator prototypes.
19
+ """
20
+
21
+ import os
22
+ import re
23
+
24
+ from pyboost_utils import is_cube, AclnnUtils, get_return_type, merge_strings_by_chunk_size, is_op_multi_output
25
+ import template
26
+ import gen_constants as K
27
+ from gen_utils import save_file
28
+ from op_proto import OpProto
29
+ from op_template_parser import OpTemplateParser
30
+ from base_generator import BaseGenerator
31
+
32
+
33
+ class PyboostCommonOpHeaderGenerator(BaseGenerator):
34
+ """
35
+ Generates common C++ headers for PyBoost operations.
36
+
37
+ This class processes operator prototypes and generates header files containing function definitions
38
+ based on templates provided. It specifically generates the headers that define operations for PyBoost.
39
+ """
40
+
41
+ def __init__(self):
42
+ self.pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE
43
+
44
+ def generate(self, work_path, op_protos):
45
+ """
46
+ Generates header files for the provided operator prototypes.
47
+
48
+ Args:
49
+ work_path (str): The directory path where the header files will be saved.
50
+ op_protos (list): A list of operator prototypes containing information about the operators.
51
+
52
+ Returns:
53
+ None
54
+ """
55
+ for op_proto in op_protos:
56
+ if op_proto.op_dispatch is None:
57
+ continue
58
+ op_parser = OpTemplateParser(op_proto)
59
+ op_name_str = op_proto.op_class.name
60
+ call_args_with_type = op_parser.parse_call_args_with_types()
61
+ cpp_func_return = _generate_cpp_func_return(op_proto)
62
+ output_is_tuple = "bool output_is_tuple() const override { return true; }" \
63
+ if is_op_multi_output(op_proto.op_returns) else ''
64
+ pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE.replace(op_name=op_name_str,
65
+ op_name_upper=op_name_str.upper(),
66
+ call_args=call_args_with_type,
67
+ return_type=cpp_func_return,
68
+ output_is_tuple=output_is_tuple)
69
+ save_path = os.path.join(work_path, f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
70
+ file_name = f"{op_proto.op_name}.h"
71
+ save_file(save_path, file_name, pyboost_op_header_str)
72
+
73
+
74
+ class PyboostOpHeaderGenerator(BaseGenerator):
75
+ """
76
+ Generates device-specific C++ headers for PyBoost operations.
77
+
78
+ This class generates header files for different devices (Ascend, CPU, GPU) and defines
79
+ the operation functions accordingly.
80
+ """
81
+
82
+ def __init__(self, device):
83
+ """
84
+ Initializes the PyboostOpHeaderGenerator with the appropriate templates for the specified device.
85
+
86
+ Args:
87
+ device (str): The target device (ascend, gpu, or cpu).
88
+
89
+ Raises:
90
+ ValueError: If the device is not supported.
91
+ """
92
+ template_dict = {"ascend": template.PYBOOST_ASCEND_OP_HEADER_TEMPLATE,
93
+ "gpu": template.PYBOOST_GPU_OP_HEADER_TEMPLATE,
94
+ "cpu": template.PYBOOST_CPU_OP_HEADER_TEMPLATE}
95
+ if device not in template_dict:
96
+ raise ValueError(f"Device must be ascend, gpu, or cpu, {device} is not supported")
97
+ self.PYBOOST_OP_HEADER_TEMPLATE = template_dict[device]
98
+ self.code_generate_path = f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/"
99
+ self.device = device
100
+
101
+ def generate(self, work_path, op_protos):
102
+ """
103
+ Generates header files for the provided operator prototypes based on the device.
104
+
105
+ Args:
106
+ work_path (str): The directory path where the header files will be saved.
107
+ op_protos (list): A list of operator prototypes containing information about the operators.
108
+
109
+ Returns:
110
+ None
111
+ """
112
+ for op_proto in op_protos:
113
+ if op_proto.op_dispatch is None:
114
+ continue
115
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
116
+ continue
117
+ op_parser = OpTemplateParser(op_proto)
118
+ op_name_str = op_proto.op_class.name
119
+ call_args_with_type = op_parser.parse_call_args_with_types()
120
+ cpp_func_return = _generate_cpp_func_return(op_proto)
121
+
122
+ pyboost_op_str = self.PYBOOST_OP_HEADER_TEMPLATE.replace(op_name=op_name_str,
123
+ op_name_upper=op_name_str.upper(),
124
+ operator_name=op_proto.op_name,
125
+ call_args_with_type=call_args_with_type,
126
+ return_type=cpp_func_return)
127
+
128
+ save_path = os.path.join(work_path, self.code_generate_path)
129
+ file_name = f"{op_proto.op_name}.h"
130
+ save_file(save_path, file_name, pyboost_op_str)
131
+
132
+
133
+ class PyboostOpCppGenerator:
134
+ """
135
+ Generates C++ source files for PyBoost operations.
136
+
137
+ This class generates the implementation of operations for different devices, handling function calls
138
+ and registering custom kernels as necessary.
139
+ """
140
+
141
+ def __init__(self, device):
142
+ """
143
+ Initializes the PyboostOpCppGenerator with the appropriate templates for the specified device.
144
+
145
+ Args:
146
+ device (str): The target device (ascend, gpu, or cpu).
147
+
148
+ Raises:
149
+ ValueError: If the device is not supported.
150
+ """
151
+ if device == 'ascend':
152
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_ASCEND_CUSTOMIZE_CALL_TEMPLATE
153
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
154
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
155
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
156
+ elif device == 'cpu':
157
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_CPU_CUSTOMIZE_CALL_TEMPLATE
158
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
159
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
160
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
161
+ elif device == 'gpu':
162
+ PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_GPU_CUSTOMIZE_CALL_TEMPLATE
163
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
164
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
165
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
166
+ else:
167
+ raise ValueError(f"Device must be ascend, gpu, or cpu, {device} is not supported")
168
+ self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE = PYBOOST_CUSTOMIZE_CALL_TEMPLATE
169
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
170
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
171
+ self.gen_path = gen_path
172
+ self.device = device
173
+
174
+ def generate_customize_op_cpp_code(self, op_protos, merge_op_header, merge_op_function):
175
+ """
176
+ Generate C++ code for PyBoost operations using the provided operation prototypes.
177
+
178
+ This method processes a list of operation prototypes, generates customized function call
179
+ implementations, and updates the merged headers and functions for the specified device.
180
+
181
+ Args:
182
+ op_protos (list): A list of operation prototypes to process. Each prototype contains
183
+ metadata about the operation, including dispatch settings and arguments.
184
+ merge_op_header (list): A list to store the generated C++ header code for operations.
185
+ merge_op_function (list): A list to store the generated C++ source code for operations.
186
+ """
187
+ for op_proto in op_protos:
188
+ if op_proto.op_dispatch is None:
189
+ continue
190
+ if getattr(op_proto.op_dispatch, self.device) == 'default':
191
+ continue
192
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
193
+ continue
194
+ op_parser = OpTemplateParser(op_proto)
195
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
196
+ call_args_with_type = op_parser.parse_call_args_with_types()
197
+ _, call_func_outputs = op_parser.generate_pyboost_outputs()
198
+ operator_name = op_proto.op_name
199
+ op_name_str = op_proto.op_class.name
200
+ check_inplace_func = ''
201
+ for arg in op_proto.op_returns:
202
+ if arg.inplace != '':
203
+ check_inplace_func = f'ThrowExpectionWhenInternalOverlap({arg.inplace}_tensor);'
204
+ break
205
+ call_impl = self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE.replace(
206
+ call_args=call_args,
207
+ return_values=call_func_outputs,
208
+ customize_func=getattr(op_proto.op_dispatch, self.device) + "Customize",
209
+ check_expression=check_inplace_func,
210
+ )
211
+ customize_include = \
212
+ f'#include "{K.MS_OPS_KERNEL_PATH}/{self.device}/pyboost/customize/{operator_name.lower()}.h"\n'
213
+ register_custom = self._get_register_custom_kernel(op_proto)
214
+ cpp_func_return = _generate_cpp_func_return(op_proto)
215
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
216
+ customize_include=customize_include))
217
+ merge_op_function.append(
218
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
219
+ call_args_with_type=call_args_with_type,
220
+ return_type=cpp_func_return, call_impl=call_impl,
221
+ register_custom_kernel=register_custom))
222
+
223
+ def _get_register_custom_kernel(self, op_proto: OpProto):
224
+ """
225
+ Generates the registration code for custom kernels based on the device.
226
+
227
+ Args:
228
+ op_proto (OpProto): The operator prototype to generate registration for.
229
+
230
+ Returns:
231
+ str: The registration code for the custom kernel.
232
+ """
233
+ if self.device == 'ascend':
234
+ register_custom_kernel = ''
235
+ elif self.device == 'cpu':
236
+ register_custom_kernel = f"MS_REG_PYBOOST_CPU_CUSTOM_KERNEL({op_proto.op_class.name});"
237
+ elif self.device == 'gpu':
238
+ register_custom_kernel = f"MS_REG_PYBOOST_GPU_CUSTOM_KERNEL({op_proto.op_class.name});"
239
+ else:
240
+ raise ValueError(f"Device must be ascend, gpu, or cpu, {self.device} is not supported")
241
+ return register_custom_kernel
242
+
243
+
244
+ class PyboostViewOpCppGenerator:
245
+ """
246
+ Generates C++ source files for view operations in PyBoost.
247
+
248
+ This class handles the generation of source files for view operations, which have special handling
249
+ compared to regular operations.
250
+ """
251
+
252
+ def __init__(self, device):
253
+ """
254
+ Initializes the PyboostViewOpCppGenerator with the appropriate templates for the specified device.
255
+
256
+ Args:
257
+ device (str): The target device (ascend, gpu, or cpu).
258
+
259
+ Raises:
260
+ ValueError: If the device is not supported.
261
+ """
262
+ if device == 'ascend':
263
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_ASCEND_VIEW_CALL_TEMPLATE
264
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
265
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
266
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
267
+ elif device == 'cpu':
268
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_CPU_VIEW_CALL_TEMPLATE
269
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
270
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
271
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
272
+ elif device == 'gpu':
273
+ PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_GPU_VIEW_CALL_TEMPLATE
274
+ PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
275
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
276
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
277
+ else:
278
+ raise ValueError(f"Device must be ascend, gpu, or cpu, {device} is not supported")
279
+ self.PYBOOST_VIEW_CALL_TEMPLATE = PYBOOST_VIEW_CALL_TEMPLATE
280
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
281
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
282
+ self.gen_path = gen_path
283
+ self.device = device
284
+
285
+ def generate_view_op_cpp_code(self, op_protos, merge_op_header, merge_op_function):
286
+ """
287
+ Generate C++ code for view operations in PyBoost.
288
+
289
+ This method processes a list of operation prototypes (`op_protos`) and generates C++ code
290
+ for view operations where `op_view` is set to `True` and the dispatch setting for the target
291
+ device is `'default'`.
292
+
293
+ Args:
294
+ op_protos (list): A list of operation prototypes to process. Each prototype includes
295
+ metadata such as dispatch settings, arguments, and view-specific attributes.
296
+ merge_op_header (list): A list to store the generated C++ header code for view operations.
297
+ merge_op_function (list): A list to store the generated C++ source code for view operations.
298
+ """
299
+ for op_proto in op_protos:
300
+ if op_proto.op_dispatch is None:
301
+ continue
302
+ if getattr(op_proto.op_dispatch, self.device) != 'default':
303
+ continue
304
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
305
+ continue
306
+ if not op_proto.op_view:
307
+ continue
308
+
309
+ op_parser = OpTemplateParser(op_proto)
310
+ call_args_tensor = op_parser.get_call_args_tensor()
311
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
312
+ call_args_with_type = op_parser.parse_call_args_with_types()
313
+ _, call_func_outputs = op_parser.generate_pyboost_outputs()
314
+ call_impl = self.PYBOOST_VIEW_CALL_TEMPLATE.replace(op_name=op_proto.op_class.name,
315
+ call_args=call_args,
316
+ call_tensors=call_args_tensor,
317
+ return_values=call_func_outputs,
318
+ input=call_args[0])
319
+ customize_include = f'#include "{K.MS_OPS_VIEW_PATH}/{op_proto.op_name}_strides_calc.h"\n'
320
+ cpp_func_return = _generate_cpp_func_return(op_proto)
321
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
322
+ customize_include=customize_include))
323
+
324
+ merge_op_function.append(
325
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
326
+ call_args_with_type=call_args_with_type,
327
+ return_type=cpp_func_return,
328
+ call_impl=call_impl,
329
+ register_custom_kernel=""))
330
+
331
+
332
+ class AclnnOpCppCodeGenerator:
333
+ """
334
+ Generates C++ source files for ACLNN operations in PyBoost.
335
+
336
+ This class handles the generation of source files for operations that utilize the ACLNN framework,
337
+ including customized calls and tensor management.
338
+
339
+ Attributes:
340
+ PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
341
+ PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
342
+ gen_path (str): Path for saving the generated C++ source files.
343
+ device (str): The target device (ascend, cpu, or gpu).
344
+ """
345
+
346
+ def __init__(self, device):
347
+ """
348
+ Initializes the AclnnOpCppCodeGenerator with the appropriate templates for the specified device.
349
+
350
+ Args:
351
+ device (str): The target device (ascend, gpu, or cpu).
352
+
353
+ Raises:
354
+ ValueError: If the device is not supported.
355
+ """
356
+ if device == 'ascend':
357
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_ASCEND_CALL_TEMPLATE
358
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
359
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
360
+ elif device == 'cpu':
361
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_CPU_CALL_TEMPLATE
362
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
363
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
364
+ elif device == 'gpu':
365
+ PYBOOST_CALL_TEMPLATE = template.PYBOOST_GPU_CALL_TEMPLATE
366
+ PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
367
+ gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
368
+ else:
369
+ raise ValueError(f"Device must be ascend, gpu, or cpu, {device} is not supported")
370
+ self.PYBOOST_CALL_TEMPLATE = PYBOOST_CALL_TEMPLATE
371
+ self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.Template(
372
+ '#include "kernel/${device}/pyboost/auto_generate/${operator_name}.h"\n'
373
+ )
374
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
375
+ self.gen_path = gen_path
376
+ self.device = device
377
+
378
+ def generate_aclnn_op_cpp_code(self, op_protos, merge_op_header, merge_op_function):
379
+ """
380
+ Generate C++ code for ACLNN operations in PyBoost.
381
+
382
+ This method processes a list of operation prototypes (`op_protos`) and generates C++ code
383
+ for aclnn operations. The method filters the operation
384
+ prototypes based on their dispatch and view settings, and then uses templates and metadata
385
+ to generate the necessary implementation and header files.
386
+
387
+ Args:
388
+ op_protos (list): A list of operation prototypes. Each prototype includes metadata
389
+ such as operation name, dispatch settings, view attributes, and arguments.
390
+ merge_op_header (list): A list to store the generated C++ header code for ACLNN operations.
391
+ merge_op_function (list): A list to store the generated C++ source code for ACLNN operations.
392
+ """
393
+ for op_proto in op_protos:
394
+ if op_proto.op_dispatch is None:
395
+ continue
396
+ if getattr(op_proto.op_dispatch, self.device) != 'default':
397
+ continue
398
+ if getattr(op_proto.op_dispatch, self.device) == 'None':
399
+ continue
400
+ if op_proto.op_view:
401
+ continue
402
+
403
+ op_parser = OpTemplateParser(op_proto)
404
+ aclnn_name = AclnnUtils.get_aclnn_interface(op_proto.op_class.name)
405
+
406
+ call_args_tensor = op_parser.get_call_args_tensor()
407
+ create_input_address = self._generate_create_input_address(op_parser)
408
+ malloc_inputs = self._generate_malloc_input(op_parser)
409
+ op_outputs, call_func_outputs = op_parser.generate_pyboost_outputs()
410
+ get_inputs_kernel_tensors = self._generate_get_inputs_kernel_tensors(op_parser)
411
+
412
+ cube_math_type, get_cube_math_type = '', ''
413
+ if self.device == 'ascend' and is_cube(op_proto.op_class.name):
414
+ get_cube_math_type = f'// cubeMathType: 0 - KEEP_DTYPE, 1 - ALLOW_FP32_DOWN_PRECISION\n'
415
+ get_cube_math_type += "auto cube_math_type = GetCubeMathType();"
416
+ cube_math_type = ', cube_math_type'
417
+
418
+ real_output = ', ' + op_outputs \
419
+ if _generate_inplace_process_cpp_code(op_proto) == '' else ''
420
+
421
+ cast_input_code, real_call_args_tensor = self._generate_tensor_cpu_cast_input_code(
422
+ op_parser)
423
+ cpp_func_return = _generate_cpp_func_return(op_proto)
424
+ _, tensor_list_convert, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
425
+ call_args_after_convert, value_tuple_convert, const_number_convert = op_parser.op_args_converter()
426
+ call_args = op_parser.parse_original_call_args(op_proto.op_args)
427
+ call_args_with_type = op_parser.parse_call_args_with_types()
428
+ inplace_process = _generate_inplace_process_cpp_code(op_proto)
429
+ call_impl = self.PYBOOST_CALL_TEMPLATE.replace(aclnn_name=aclnn_name,
430
+ call_args=call_args,
431
+ call_tensors=call_args_tensor,
432
+ value_tuple_convert=value_tuple_convert,
433
+ const_number_convert=const_number_convert,
434
+ create_input_address=create_input_address,
435
+ tensor_list_convert=tensor_list_convert,
436
+ call_args_with_tensor=call_args_with_tensor,
437
+ malloc_inputs=malloc_inputs,
438
+ get_inputs_kernel_tensors=get_inputs_kernel_tensors,
439
+ get_cube_math_type=get_cube_math_type,
440
+ cube_math_type=cube_math_type,
441
+ real_call_args=call_args_after_convert,
442
+ return_values=call_func_outputs,
443
+ outputs=real_output,
444
+ inplace_process=inplace_process,
445
+ cast_input_code=cast_input_code,
446
+ real_call_args_tensor=real_call_args_tensor,
447
+ class_name=op_proto.op_class.name,
448
+ op_name_str=op_proto.op_class.name)
449
+
450
+ merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
451
+ device=self.device))
452
+
453
+ merge_op_function.append(
454
+ self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
455
+ call_args_with_type=call_args_with_type,
456
+ return_type=cpp_func_return,
457
+ call_impl=call_impl,
458
+ register_custom_kernel=''))
459
+
460
+ def _generate_tensor_cpu_cast_input_code(self, op_parser: OpTemplateParser):
461
+ """
462
+ Generates the input casting code for CPU tensor operations.
463
+
464
+ Args:
465
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
466
+
467
+ Returns:
468
+ tuple: A tuple containing the casting code and the updated tensor call arguments.
469
+ """
470
+ _, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
471
+ call_tensors = op_parser.get_call_args_tensor()
472
+ cast_input = ""
473
+ real_call_args_tensor = call_args_with_tensor.copy()
474
+ for i, tensor in enumerate(call_args_with_tensor):
475
+ is_tuple_tensor = real_call_args_tensor[i].endswith("_vector")
476
+ is_tensor = real_call_args_tensor[i] in call_tensors
477
+ if is_tensor:
478
+ cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
479
+ f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
480
+ real_call_args_tensor[i] = "real_" + real_call_args_tensor[i]
481
+ if is_tuple_tensor:
482
+ cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
483
+ f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
484
+ real_call_args_tensor[i] = "PyBoostUtils::ConvertTensorVectorToTuple(real_" + real_call_args_tensor[
485
+ i] + ")"
486
+ if cast_input != "":
487
+ cast_input = "auto &select_kernel = kernel_attr_pair.second;\n" + cast_input
488
+ return cast_input, real_call_args_tensor
489
+
490
+ def _generate_create_input_address(self, op_parser: OpTemplateParser):
491
+ need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
492
+ create_input_address = ''
493
+ args_list = ''
494
+ for item in need_malloc_tensors:
495
+ args_list += f'{item}, '
496
+ args_list = args_list[:-2]
497
+ if args_list:
498
+ create_input_address = f'PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), {args_list});\n'
499
+ return create_input_address
500
+
501
+ def _generate_malloc_input(self, op_parser: OpTemplateParser):
502
+ """
503
+ Generates the code for creating input addresses for tensors that need to be allocated.
504
+
505
+ Args:
506
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
507
+
508
+ Returns:
509
+ str: The generated code for creating input addresses.
510
+ """
511
+ need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
512
+ malloc_inputs = ''
513
+ args_list = ''
514
+ for item in need_malloc_tensors:
515
+ args_list += f'{item}, '
516
+ args_list = args_list[:-2]
517
+ if args_list:
518
+ malloc_inputs += f'PyBoostUtils::MallocOpInputs(device_context, {args_list});\n'
519
+ return malloc_inputs
520
+
521
+ def _generate_get_inputs_kernel_tensors(self, op_parser: OpTemplateParser):
522
+ """
523
+ Generates the code for retrieving input kernel tensors.
524
+
525
+ Args:
526
+ op_parser (OpTemplateParser): The parser object for the operation prototype.
527
+
528
+ Returns:
529
+ str: The generated code for retrieving input kernel tensors.
530
+ """
531
+ _, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
532
+ inputs_kernel_tensors = ''
533
+ args_list = ''
534
+ for item in call_args_with_tensor:
535
+ args_list += f'{item}, '
536
+ args_list = args_list[:-2]
537
+ if args_list:
538
+ inputs_kernel_tensors += f'const auto &input_address_info = PyBoostUtils::GetAddressInfo(' \
539
+ f'device_context, op->stream_id(), op->input_abs(), {args_list});\n'
540
+ return inputs_kernel_tensors
541
+
542
+
543
+ class PyboostOpFunctionGenerator(BaseGenerator):
544
+ """
545
+ Generates C++ source files for ACLNN operations in PyBoost.
546
+
547
+ This class handles the generation of source files for operations that utilize the ACLNN framework,
548
+ including customized calls and tensor management.
549
+
550
+ Attributes:
551
+ PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
552
+ PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
553
+ gen_path (str): Path for saving the generated C++ source files.
554
+ device (str): The target device (ascend, cpu, or gpu).
555
+ """
556
+
557
+ def __init__(self):
558
+ self.ascend_op_cpp_generator = PyboostOpCppGenerator('ascend')
559
+ self.ascend_view_op_cpp_generator = PyboostViewOpCppGenerator('ascend')
560
+ self.ascend_aclnn_cpp_generator = AclnnOpCppCodeGenerator('ascend')
561
+
562
+ self.cpu_op_cpp_generator = PyboostOpCppGenerator('cpu')
563
+ self.cpu_view_op_cpp_generator = PyboostViewOpCppGenerator('cpu')
564
+ self.cpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('cpu')
565
+
566
+ self.gpu_op_cpp_generator = PyboostOpCppGenerator('gpu')
567
+ self.gpu_view_op_cpp_generator = PyboostViewOpCppGenerator('gpu')
568
+ self.gpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('gpu')
569
+
570
+ self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE
571
+ self.PYBOOST_CPU_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_OP_SOURCE_TEMPLATE
572
+ self.PYBOOST_GPU_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_OP_SOURCE_TEMPLATE
573
+ self.ascend_gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
574
+ self.cpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
575
+ self.gpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
576
+
577
+ def generate(self, work_path, op_protos):
578
+ """
579
+ Generate and save C++ source code for PyBoost operations across different devices.
580
+
581
+ This method generates C++ source files for operations (`op_protos`) tailored to Ascend, CPU,
582
+ and GPU devices. It combines headers and function implementations for each device, and then
583
+ saves the final source files to the appropriate paths.
584
+
585
+ Args:
586
+ op_protos (list): A list of operation prototypes containing metadata such as
587
+ operation name, dispatch settings, arguments, and view attributes.
588
+ work_path (str): The base working directory where the generated files will be saved.
589
+
590
+ Generated Files:
591
+ - Ascend: `pyboost_ascend_ops.cc`
592
+ - CPU: `pyboost_cpu_ops.cc`
593
+ - GPU: `pyboost_gpu_ops.cc`
594
+ """
595
+ self._generate_pyboost_ascend_ops(work_path, op_protos)
596
+ self._generate_pyboost_cpu_ops(work_path, op_protos)
597
+ self._generate_pyboost_gpu_ops(work_path, op_protos)
598
+
599
+ def _generate_pyboost_ascend_ops(self, work_path, op_protos):
600
+ """
601
+ Generates Ascend PyBoost ops functions source files after being merged into specific chunk sizes.
602
+
603
+ Args:
604
+ work_path (str): The directory path where the generated C++ source files will be saved.
605
+ op_protos (list): A list of operation prototypes that define the operations for which
606
+ the C++ code will be generated.
607
+ """
608
+ ascend_merge_op_header = []
609
+ ascend_merge_op_function = []
610
+ self.ascend_op_cpp_generator.generate_customize_op_cpp_code(op_protos, ascend_merge_op_header,
611
+ ascend_merge_op_function)
612
+ self.ascend_view_op_cpp_generator.generate_view_op_cpp_code(op_protos, ascend_merge_op_header,
613
+ ascend_merge_op_function)
614
+ self.ascend_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, ascend_merge_op_header,
615
+ ascend_merge_op_function)
616
+
617
+ ascend_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(ascend_merge_op_header, chunk_size=120)
618
+ ascend_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(ascend_merge_op_function, chunk_size=120)
619
+
620
+ new_gen_num = len(ascend_op_header_merge_by_chunk_size)
621
+ self._delete_residual_merged_ops_files(os.path.join(work_path, self.ascend_gen_path), new_gen_num)
622
+
623
+ for i, op_header, op_function in zip(range(len(ascend_op_header_merge_by_chunk_size)),
624
+ ascend_op_header_merge_by_chunk_size,
625
+ ascend_op_function_merge_by_chunk_size):
626
+ ascend_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
627
+ merge_op_header=op_header, merge_op_function=op_function)
628
+ save_file(os.path.join(work_path, self.ascend_gen_path), f"pyboost_ascend_ops_{i}.cc",
629
+ ascend_pyboost_op_source)
630
+
631
+ def _generate_pyboost_cpu_ops(self, work_path, op_protos):
632
+ """
633
+ Generates CPU PyBoost ops functions source files after being merged into specific chunk sizes.
634
+
635
+ Args:
636
+ work_path (str): The directory path where the generated C++ source files will be saved.
637
+ op_protos (list): A list of operation prototypes that define the operations for which
638
+ the C++ code will be generated.
639
+ """
640
+ cpu_merge_op_header = []
641
+ cpu_merge_op_function = []
642
+ self.cpu_op_cpp_generator.generate_customize_op_cpp_code(op_protos, cpu_merge_op_header, cpu_merge_op_function)
643
+ self.cpu_view_op_cpp_generator.generate_view_op_cpp_code(op_protos, cpu_merge_op_header, cpu_merge_op_function)
644
+ self.cpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, cpu_merge_op_header, cpu_merge_op_function)
645
+ cpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(cpu_merge_op_header, chunk_size=120)
646
+ cpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(cpu_merge_op_function, chunk_size=120)
647
+
648
+ new_gen_num = len(cpu_op_header_merge_by_chunk_size)
649
+ self._delete_residual_merged_ops_files(os.path.join(work_path, self.cpu_gen_path), new_gen_num)
650
+
651
+ for i, op_header, op_function in zip(range(len(cpu_op_header_merge_by_chunk_size)),
652
+ cpu_op_header_merge_by_chunk_size,
653
+ cpu_op_function_merge_by_chunk_size):
654
+ cpu_pyboost_op_source = self.PYBOOST_CPU_OP_SOURCE_TEMPLATE.replace(
655
+ merge_op_header=op_header, merge_op_function=op_function)
656
+ save_file(os.path.join(work_path, self.cpu_gen_path), f"pyboost_cpu_ops_{i}.cc",
657
+ cpu_pyboost_op_source)
658
+
659
+ def _generate_pyboost_gpu_ops(self, work_path, op_protos):
660
+ """
661
+ Generates GPU PyBoost ops functions source files after being merged into specific chunk sizes.
662
+
663
+ Args:
664
+ work_path (str): The directory path where the generated C++ source files will be saved.
665
+ op_protos (list): A list of operation prototypes that define the operations for which
666
+ the C++ code will be generated.
667
+ """
668
+ gpu_merge_op_header = []
669
+ gpu_merge_op_function = []
670
+ self.gpu_op_cpp_generator.generate_customize_op_cpp_code(op_protos, gpu_merge_op_header, gpu_merge_op_function)
671
+ self.gpu_view_op_cpp_generator.generate_view_op_cpp_code(op_protos, gpu_merge_op_header, gpu_merge_op_function)
672
+ self.gpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, gpu_merge_op_header, gpu_merge_op_function)
673
+ gpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(gpu_merge_op_header, chunk_size=120)
674
+ gpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(gpu_merge_op_function, chunk_size=120)
675
+
676
+ new_gen_num = len(gpu_op_header_merge_by_chunk_size)
677
+ self._delete_residual_merged_ops_files(os.path.join(work_path, self.gpu_gen_path), new_gen_num)
678
+
679
+ for i, op_header, op_function in zip(range(len(gpu_op_header_merge_by_chunk_size)),
680
+ gpu_op_header_merge_by_chunk_size,
681
+ gpu_op_function_merge_by_chunk_size):
682
+ gpu_pyboost_op_source = self.PYBOOST_GPU_OP_SOURCE_TEMPLATE.replace(
683
+ merge_op_header=op_header, merge_op_function=op_function)
684
+ save_file(os.path.join(work_path, self.gpu_gen_path), f"pyboost_gpu_ops_{i}.cc",
685
+ gpu_pyboost_op_source)
686
+
687
+ def _delete_residual_merged_ops_files(self, files_path, new_gen_num):
688
+ """
689
+ Deletes residual merged operation files in the specified directory if the number of
690
+ newly generated files does not match the number of existing ones.
691
+
692
+ This method first lists all files in the specified directory, then filters out the files
693
+ that match the pattern `pyboost_.*_ops_.*.cc` (i.e., files related to pyboost ops). It compares
694
+ the number of such files (`old_files_num`) with the `new_gen_num` argument, which represents
695
+ the expected number of new pyboost ops files. If the counts do not match, the method will
696
+ delete all the existing pyboost ops files in the directory before any new ones can be generated.
697
+
698
+ Args:
699
+ files_path (str): The path to the directory containing the files to be checked and deleted.
700
+ new_gen_num (int): The number of newly generated pyboost ops files expected to be in the directory.
701
+
702
+ Returns:
703
+ None
704
+ """
705
+ all_files = os.listdir(files_path)
706
+ old_pyboost_ops_files = [file for file in all_files if re.match(r'pyboost_.*_ops_.*\.cc', file)]
707
+ old_files_num = len(old_pyboost_ops_files)
708
+ if new_gen_num != old_files_num:
709
+ for file in old_pyboost_ops_files:
710
+ os.remove(os.path.join(files_path, file))
711
+
712
+
713
+ def _generate_cpp_func_return(op_proto):
714
+ """Generates the C++ return type for the given operator prototype.
715
+
716
+ Args:
717
+ op_proto (OpProto): The operator prototype containing return information.
718
+
719
+ Returns:
720
+ str: The C++ return type for the function based on the operator prototype.
721
+
722
+ Raises:
723
+ Exception: If no return type is found.
724
+ """
725
+ returns_type = []
726
+ type_convert_to_base = {
727
+ 'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::BaseTensorPtr>',
728
+ 'mindspore::tensor::TensorPtr': 'mindspore::tensor::BaseTensorPtr'
729
+ }
730
+ for return_obj in op_proto.op_returns:
731
+ temp_return = get_return_type(return_obj.arg_dtype)
732
+ if temp_return in type_convert_to_base:
733
+ returns_type.append(type_convert_to_base[temp_return])
734
+ else:
735
+ raise Exception("Not return found")
736
+ if len(returns_type) == 1:
737
+ cpp_func_return = returns_type[0]
738
+ elif len(returns_type) > 1:
739
+ cpp_func_return = "std::tuple<"
740
+ cpp_func_return += ','.join(s for s in returns_type)
741
+ cpp_func_return += ">"
742
+ else:
743
+ raise Exception("Not return found")
744
+ return cpp_func_return
745
+
746
+
747
+ def _generate_inplace_process_cpp_code(op_proto):
748
+ """Generates C++ code for updating outputs by input tensors for inplace processing.
749
+
750
+ Args:
751
+ op_proto (OpProto): The operator prototype containing return information.
752
+
753
+ Returns:
754
+ str: The C++ code for inplace processing, or an empty string if no inplace processing is needed.
755
+ """
756
+ inplace_process = f'// RefOps update output by input tensor\n'
757
+ has_ref = False
758
+ for index, return_obj in enumerate(op_proto.op_returns):
759
+ if return_obj.inplace != '':
760
+ inplace_process += f'outputs_[{index}]->set_device_address(' \
761
+ f'{return_obj.inplace}_tensor->device_address()); '
762
+ has_ref = True
763
+ break
764
+ if has_ref:
765
+ return inplace_process
766
+ return ''
767
+
768
+
769
+ def delete_residual_files(work_path, op_protos):
770
+ """
771
+ Deletes residual files generated for operator prototypes that are no longer needed.
772
+
773
+ Args:
774
+ work_path (str): The base directory path where generated files are located.
775
+ op_protos (list): A list of operator prototypes that are currently valid.
776
+
777
+ Returns:
778
+ None
779
+ """
780
+ all_operator_name = []
781
+ for op_proto in op_protos:
782
+ all_operator_name.append(op_proto.op_name)
783
+ code_generate_path_list = [f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/" for device in
784
+ ["ascend", "gpu", "cpu"]]
785
+ code_generate_path_list.append(f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
786
+ for code_generate_path in code_generate_path_list:
787
+ filter_files = []
788
+ code_generate_path = os.path.join(work_path, code_generate_path)
789
+ if os.path.exists(code_generate_path):
790
+ all_files = os.listdir(code_generate_path)
791
+ # No need to delete pyboost_.*_ops_.*.cc files and op_register.cc.
792
+ # These residual files will be deleted before new files generate.
793
+ filter_files = [file for file in all_files if
794
+ not re.match(r'pyboost_.*_ops_.*\.cc', file) and file != "op_register.cc"]
795
+ registered_op_name = set(item.split(".")[0] for item in filter_files)
796
+ need_clean_op = registered_op_name - set(all_operator_name)
797
+
798
+ for file in filter_files:
799
+ file_name = file.split(".")[0]
800
+ if file_name in need_clean_op:
801
+ file_path = os.path.join(code_generate_path, file)
802
+ if os.path.exists(file_path):
803
+ os.remove(file_path)
804
+
805
+
806
+ class PyboostOpRegisterCppCodeGenerator:
807
+ """
808
+ Generates registration C++ code for PyBoost operations.
809
+
810
+ This class is responsible for creating a registration source file that includes
811
+ all the necessary headers and template instantiations for the registered operations.
812
+
813
+ Attributes:
814
+ PYBOOST_OP_REGISTER_TEMPLATE (Template): Template for generating the operation registration code.
815
+ """
816
+
817
+ def __init__(self):
818
+ self.PYBOOST_OP_REGISTER_TEMPLATE = template.PYBOOST_OP_REGISTER_TEMPLATE
819
+
820
+ def generate(self, work_path, op_protos):
821
+ """
822
+ Generates a C++ source file for registering all PyBoost operations.
823
+
824
+ Args:
825
+ work_path (str): The directory path where the registration file will be saved.
826
+ op_protos (list): A list of operator prototypes containing information about the operations.
827
+
828
+ Returns:
829
+ None
830
+ """
831
+ all_op_names = []
832
+ all_functional_names = []
833
+ for op_proto in op_protos:
834
+ if op_proto.op_dispatch is None:
835
+ continue
836
+ functional_name = op_proto.op_name
837
+ op_name_str = op_proto.op_class.name
838
+ all_op_names.append(op_name_str)
839
+ all_functional_names.append(functional_name)
840
+
841
+ include_str = ''
842
+ factory_str = ''
843
+ for op_name in all_op_names:
844
+ factory_str += "template class OpFactory<{0}>;\n".format(op_name)
845
+ for operator_name in all_functional_names:
846
+ include_str += f'#include "{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/{operator_name}.h"\n'
847
+ op_register_file_str = self.PYBOOST_OP_REGISTER_TEMPLATE.replace(op_includes=include_str,
848
+ op_factory_templates=factory_str)
849
+ save_path = os.path.join(work_path, f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
850
+ file_name = "op_register.cc"
851
+ save_file(save_path, file_name, op_register_file_str)