mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +13 -6
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -0
- mindspore/_checkparam.py +3 -38
- mindspore/_deprecated/__init__.py +17 -0
- mindspore/_deprecated/jit.py +198 -0
- mindspore/_extends/builtin_operations.py +1 -1
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +6 -7
- mindspore/_extends/parse/compile_config.py +83 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
- mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
- mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
- mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
- mindspore/_extends/parse/parser.py +46 -197
- mindspore/_extends/parse/resources.py +1 -5
- mindspore/_extends/parse/standard_method.py +217 -98
- mindspore/_extends/pijit/__init__.py +2 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
- mindspore/_extends/pijit/tensor_func_list.py +27 -0
- mindspore/_extends/utils.py +1 -1
- mindspore/amp.py +11 -5
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/__init__.py +2 -2
- mindspore/boost/base.py +3 -7
- mindspore/boost/boost_cell_wrapper.py +138 -43
- mindspore/common/__init__.py +6 -3
- mindspore/common/_grad_function.py +56 -0
- mindspore/common/_pijit_context.py +14 -5
- mindspore/common/_register_for_tensor.py +1 -2
- mindspore/common/_stub_tensor.py +30 -14
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +4760 -0
- mindspore/common/api.py +435 -371
- mindspore/common/auto_dynamic_shape.py +41 -44
- mindspore/common/dtype.py +39 -36
- mindspore/common/dump.py +9 -6
- mindspore/common/file_system.py +9 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +6 -2
- mindspore/common/initializer.py +13 -10
- mindspore/common/jit_begin_end.py +94 -0
- mindspore/common/jit_config.py +6 -1
- mindspore/common/jit_context.py +76 -0
- mindspore/common/jit_trace.py +378 -0
- mindspore/common/lazy_inline.py +9 -3
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/mutable.py +5 -4
- mindspore/common/parameter.py +135 -52
- mindspore/common/seed.py +2 -2
- mindspore/common/sparse_tensor.py +23 -17
- mindspore/common/tensor.py +951 -1992
- mindspore/communication/__init__.py +7 -5
- mindspore/communication/_comm_helper.py +52 -2
- mindspore/communication/comm_func.py +240 -181
- mindspore/communication/management.py +95 -26
- mindspore/context.py +314 -566
- mindspore/dataset/__init__.py +65 -37
- mindspore/dataset/audio/__init__.py +2 -8
- mindspore/dataset/audio/transforms.py +3 -17
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +87 -6
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +6 -5
- mindspore/dataset/engine/datasets.py +292 -267
- mindspore/dataset/engine/datasets_audio.py +22 -8
- mindspore/dataset/engine/datasets_standard_format.py +46 -27
- mindspore/dataset/engine/datasets_text.py +78 -48
- mindspore/dataset/engine/datasets_user_defined.py +182 -116
- mindspore/dataset/engine/datasets_vision.py +120 -44
- mindspore/dataset/engine/iterators.py +283 -63
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +289 -43
- mindspore/dataset/engine/serializer_deserializer.py +3 -2
- mindspore/dataset/engine/validators.py +53 -11
- mindspore/dataset/text/__init__.py +7 -6
- mindspore/dataset/text/transforms.py +6 -5
- mindspore/dataset/text/utils.py +3 -3
- mindspore/dataset/transforms/__init__.py +0 -9
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +31 -14
- mindspore/dataset/utils/browse_dataset.py +1 -1
- mindspore/dataset/vision/__init__.py +2 -9
- mindspore/dataset/vision/transforms.py +202 -158
- mindspore/dataset/vision/utils.py +7 -5
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +153 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +123 -0
- mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +170 -0
- mindspore/experimental/es/embedding_service.py +35 -27
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/map_parameter.py +4 -4
- mindspore/experimental/optim/adadelta.py +6 -6
- mindspore/experimental/optim/adagrad.py +4 -4
- mindspore/experimental/optim/adam.py +7 -0
- mindspore/experimental/optim/adamax.py +4 -4
- mindspore/experimental/optim/adamw.py +4 -0
- mindspore/experimental/optim/asgd.py +1 -1
- mindspore/experimental/optim/lr_scheduler.py +73 -46
- mindspore/experimental/optim/radam.py +34 -31
- mindspore/experimental/optim/rprop.py +1 -1
- mindspore/experimental/optim/sgd.py +1 -1
- mindspore/hal/contiguous_tensors_handle.py +6 -10
- mindspore/hal/device.py +55 -53
- mindspore/hal/event.py +52 -52
- mindspore/hal/memory.py +157 -117
- mindspore/hal/stream.py +150 -109
- mindspore/include/api/context.h +0 -1
- mindspore/include/dataset/constants.h +7 -4
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +50 -0
- mindspore/mindrecord/__init__.py +21 -8
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +796 -759
- mindspore/mint/distributed/__init__.py +70 -4
- mindspore/mint/distributed/distributed.py +2679 -44
- mindspore/mint/linalg/__init__.py +8 -0
- mindspore/mint/nn/__init__.py +743 -22
- mindspore/mint/nn/functional.py +716 -23
- mindspore/mint/nn/layer/__init__.py +21 -4
- mindspore/mint/nn/layer/_functions.py +334 -0
- mindspore/mint/nn/layer/activation.py +276 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +921 -0
- mindspore/mint/nn/layer/normalization.py +223 -28
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +235 -0
- mindspore/mint/optim/__init__.py +3 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/optim/sgd.py +171 -0
- mindspore/mint/special/__init__.py +2 -1
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +4 -1
- mindspore/nn/cell.py +1370 -189
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +29 -27
- mindspore/nn/layer/basic.py +51 -35
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/container.py +1 -1
- mindspore/nn/layer/conv.py +22 -17
- mindspore/nn/layer/embedding.py +12 -11
- mindspore/nn/layer/normalization.py +56 -49
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +120 -42
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +59 -36
- mindspore/nn/learning_rate_schedule.py +8 -4
- mindspore/nn/loss/loss.py +58 -55
- mindspore/nn/optim/ada_grad.py +7 -5
- mindspore/nn/optim/adadelta.py +11 -9
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +17 -13
- mindspore/nn/optim/adamax.py +8 -7
- mindspore/nn/optim/adasum.py +5 -5
- mindspore/nn/optim/asgd.py +1 -1
- mindspore/nn/optim/ftrl.py +11 -9
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/lazyadam.py +12 -10
- mindspore/nn/optim/momentum.py +7 -6
- mindspore/nn/optim/optimizer.py +3 -3
- mindspore/nn/optim/proximal_ada_grad.py +12 -10
- mindspore/nn/optim/rmsprop.py +13 -12
- mindspore/nn/optim/rprop.py +11 -9
- mindspore/nn/optim/sgd.py +9 -6
- mindspore/nn/optim/tft_wrapper.py +5 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/probability/bijector/bijector.py +17 -11
- mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
- mindspore/nn/probability/bijector/invert.py +2 -2
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +3 -2
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +1 -1
- mindspore/nn/probability/distribution/cauchy.py +4 -2
- mindspore/nn/probability/distribution/exponential.py +6 -7
- mindspore/nn/probability/distribution/gamma.py +2 -2
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/half_normal.py +5 -3
- mindspore/nn/probability/distribution/logistic.py +5 -3
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/uniform.py +5 -3
- mindspore/nn/reinforcement/_tensors_queue.py +1 -1
- mindspore/nn/reinforcement/tensor_array.py +1 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/__init__.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +181 -122
- mindspore/nn/wrap/grad_reducer.py +45 -36
- mindspore/nn/wrap/loss_scale.py +6 -7
- mindspore/numpy/array_creations.py +63 -65
- mindspore/numpy/array_ops.py +149 -144
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +365 -363
- mindspore/numpy/utils.py +17 -18
- mindspore/numpy/utils_const.py +5 -6
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +5 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_register_for_op.py +0 -11
- mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
- mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
- mindspore/ops/_vmap/vmap_array_ops.py +27 -25
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
- mindspore/ops/_vmap/vmap_math_ops.py +15 -16
- mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
- mindspore/ops/auto_generate/__init__.py +4 -3
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
- mindspore/ops/auto_generate/gen_extend_func.py +764 -124
- mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
- mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
- mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
- mindspore/ops/composite/__init__.py +2 -1
- mindspore/ops/composite/base.py +20 -25
- mindspore/ops/composite/math_ops.py +6 -16
- mindspore/ops/composite/multitype_ops/__init__.py +5 -2
- mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
- mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
- mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
- mindspore/ops/function/__init__.py +40 -2
- mindspore/ops/function/_add_attr_func.py +58 -0
- mindspore/ops/function/array_func.py +2089 -2403
- mindspore/ops/function/clip_func.py +80 -23
- mindspore/ops/function/debug_func.py +57 -57
- mindspore/ops/function/grad/__init__.py +1 -0
- mindspore/ops/function/grad/grad_func.py +104 -71
- mindspore/ops/function/image_func.py +2 -2
- mindspore/ops/function/linalg_func.py +47 -78
- mindspore/ops/function/math_func.py +4501 -3802
- mindspore/ops/function/nn_func.py +1726 -620
- mindspore/ops/function/other_func.py +159 -1
- mindspore/ops/function/parameter_func.py +18 -84
- mindspore/ops/function/random_func.py +440 -387
- mindspore/ops/function/reshard_func.py +4 -70
- mindspore/ops/function/sparse_func.py +3 -3
- mindspore/ops/function/sparse_unary_func.py +6 -6
- mindspore/ops/function/spectral_func.py +25 -58
- mindspore/ops/function/vmap_func.py +24 -17
- mindspore/ops/functional.py +22 -7
- mindspore/ops/functional_overload.py +1440 -0
- mindspore/ops/op_info_register.py +32 -244
- mindspore/ops/operations/__init__.py +13 -7
- mindspore/ops/operations/_custom_ops_utils.py +247 -0
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -43
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +43 -84
- mindspore/ops/operations/_ms_kernel.py +4 -10
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/_scalar_ops.py +3 -2
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/array_ops.py +81 -324
- mindspore/ops/operations/comm_ops.py +154 -108
- mindspore/ops/operations/custom_ops.py +232 -78
- mindspore/ops/operations/debug_ops.py +153 -59
- mindspore/ops/operations/inner_ops.py +7 -5
- mindspore/ops/operations/linalg_ops.py +1 -57
- mindspore/ops/operations/manually_defined/_inner.py +1 -1
- mindspore/ops/operations/manually_defined/ops_def.py +928 -180
- mindspore/ops/operations/math_ops.py +32 -234
- mindspore/ops/operations/nn_ops.py +210 -498
- mindspore/ops/operations/other_ops.py +62 -9
- mindspore/ops/operations/random_ops.py +13 -7
- mindspore/ops/operations/reshard_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +2 -2
- mindspore/ops/primitive.py +66 -53
- mindspore/ops/tensor_method.py +1888 -0
- mindspore/ops_generate/__init__.py +0 -5
- mindspore/ops_generate/aclnn/__init__.py +0 -0
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
- mindspore/ops_generate/api/__init__.py +0 -0
- mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
- mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
- mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
- mindspore/ops_generate/api/functions_cc_generator.py +237 -0
- mindspore/ops_generate/api/gen_api.py +103 -0
- mindspore/ops_generate/api/op_api_proto.py +235 -0
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
- mindspore/ops_generate/common/__init__.py +0 -0
- mindspore/ops_generate/common/base_generator.py +11 -0
- mindspore/ops_generate/common/gen_constants.py +91 -0
- mindspore/ops_generate/common/gen_utils.py +348 -0
- mindspore/ops_generate/common/op_proto.py +473 -0
- mindspore/ops_generate/common/template.py +523 -0
- mindspore/ops_generate/gen_ops.py +22 -1069
- mindspore/ops_generate/op_def/__init__.py +0 -0
- mindspore/ops_generate/op_def/gen_op_def.py +90 -0
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
- mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
- mindspore/ops_generate/op_def_py/__init__.py +0 -0
- mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
- mindspore/ops_generate/pyboost/__init__.py +0 -0
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
- mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
- mindspore/ops_generate/resources/__init__.py +0 -0
- mindspore/ops_generate/resources/resource_list.py +30 -0
- mindspore/ops_generate/resources/resource_loader.py +36 -0
- mindspore/ops_generate/resources/resource_manager.py +64 -0
- mindspore/ops_generate/resources/yaml_loader.py +88 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
- mindspore/parallel/__init__.py +7 -3
- mindspore/parallel/_auto_parallel_context.py +152 -34
- mindspore/parallel/_cell_wrapper.py +130 -15
- mindspore/parallel/_parallel_serialization.py +107 -5
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +7 -2
- mindspore/parallel/_tensor.py +142 -18
- mindspore/parallel/_utils.py +199 -23
- mindspore/parallel/algo_parameter_config.py +4 -4
- mindspore/parallel/auto_parallel.py +732 -0
- mindspore/parallel/checkpoint_convert.py +159 -0
- mindspore/parallel/checkpoint_transform.py +698 -35
- mindspore/parallel/cluster/process_entity/_api.py +276 -50
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +21 -4
- mindspore/parallel/function/__init__.py +24 -0
- mindspore/parallel/function/reshard_func.py +259 -0
- mindspore/parallel/nn/__init__.py +25 -0
- mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
- mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
- mindspore/parallel/parameter_broadcast.py +25 -14
- mindspore/parallel/shard.py +137 -58
- mindspore/parallel/transform_safetensors.py +363 -305
- mindspore/profiler/__init__.py +22 -5
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +186 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +221 -0
- mindspore/profiler/common/path_manager.py +395 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +500 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_meta_data.py +74 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +251 -0
- mindspore/profiler/common/profiler_path_manager.py +179 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +341 -75
- mindspore/profiler/envprofiler.py +163 -0
- mindspore/profiler/experimental_config.py +197 -0
- mindspore/profiler/mstx.py +242 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +335 -0
- mindspore/profiler/profiler.py +1073 -90
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +118 -0
- mindspore/profiler/schedule.py +243 -0
- mindspore/rewrite/api/node.py +15 -13
- mindspore/rewrite/api/symbol_tree.py +2 -3
- mindspore/run_check/_check_version.py +27 -20
- mindspore/run_check/run_check.py +1 -1
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +177 -0
- mindspore/runtime/memory.py +409 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/safeguard/rewrite_obfuscation.py +12 -9
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +8 -8
- mindspore/train/_utils.py +88 -25
- mindspore/train/amp.py +9 -5
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +2 -16
- mindspore/train/callback/_checkpoint.py +53 -55
- mindspore/train/callback/_cluster_monitor.py +14 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +103 -68
- mindspore/train/callback/_history.py +8 -5
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +0 -3
- mindspore/train/callback/_loss_monitor.py +2 -1
- mindspore/train/callback/_on_request_exit.py +6 -5
- mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
- mindspore/train/callback/_summary_collector.py +52 -19
- mindspore/train/callback/_time_monitor.py +2 -1
- mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
- mindspore/train/data_sink.py +25 -2
- mindspore/train/dataset_helper.py +15 -16
- mindspore/train/loss_scale_manager.py +8 -7
- mindspore/train/metrics/accuracy.py +3 -3
- mindspore/train/metrics/confusion_matrix.py +9 -9
- mindspore/train/metrics/error.py +3 -3
- mindspore/train/metrics/hausdorff_distance.py +4 -4
- mindspore/train/metrics/mean_surface_distance.py +3 -3
- mindspore/train/metrics/metric.py +0 -12
- mindspore/train/metrics/occlusion_sensitivity.py +4 -2
- mindspore/train/metrics/precision.py +11 -10
- mindspore/train/metrics/recall.py +9 -9
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +174 -46
- mindspore/train/model.py +184 -113
- mindspore/train/serialization.py +622 -978
- mindspore/train/summary/_summary_adapter.py +2 -2
- mindspore/train/summary/summary_record.py +2 -3
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dryrun.py +140 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/utils/utils.py +138 -4
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +562 -393
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
- mindspore/_install_custom.py +0 -43
- mindspore/common/_register_for_adapter.py +0 -74
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
- mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
- mindspore/ops_generate/gen_aclnn_implement.py +0 -263
- mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
- mindspore/ops_generate/gen_pyboost_func.py +0 -1052
- mindspore/ops_generate/gen_utils.py +0 -209
- mindspore/ops_generate/op_proto.py +0 -145
- mindspore/ops_generate/template.py +0 -261
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,938 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""
|
|
16
|
+
This module defines several classes and functions for generating C++ code for PyBoost operations,
|
|
17
|
+
including function headers, source files, and registration code. It handles the generation of code
|
|
18
|
+
for different devices (Ascend, CPU, GPU) and manages residual files associated with operator prototypes.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
|
|
24
|
+
import common.template as template
|
|
25
|
+
import common.gen_constants as K
|
|
26
|
+
from common.gen_utils import save_file
|
|
27
|
+
from common.op_proto import OpProto
|
|
28
|
+
from common.base_generator import BaseGenerator
|
|
29
|
+
|
|
30
|
+
from .pyboost_utils import is_cube, AclnnUtils, get_return_type, merge_strings_by_chunk_size, is_op_multi_output, \
|
|
31
|
+
chunk_list
|
|
32
|
+
from .op_template_parser import OpTemplateParser
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PyboostCommonOpHeaderGenerator(BaseGenerator):
|
|
36
|
+
"""
|
|
37
|
+
Generates common C++ headers for PyBoost operations.
|
|
38
|
+
|
|
39
|
+
This class processes operator prototypes and generates header files containing function definitions
|
|
40
|
+
based on templates provided. It specifically generates the headers that define operations for PyBoost.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self):
|
|
44
|
+
self.pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE
|
|
45
|
+
|
|
46
|
+
def generate(self, work_path, op_protos):
|
|
47
|
+
"""
|
|
48
|
+
Generates header files for the provided operator prototypes.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
work_path (str): The directory path where the header files will be saved.
|
|
52
|
+
op_protos (list): A list of operator prototypes containing information about the operators.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
None
|
|
56
|
+
"""
|
|
57
|
+
for op_proto in op_protos:
|
|
58
|
+
if op_proto.op_dispatch is None:
|
|
59
|
+
continue
|
|
60
|
+
op_parser = OpTemplateParser(op_proto)
|
|
61
|
+
op_name_str = op_proto.op_class.name
|
|
62
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
63
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
64
|
+
output_is_tuple = "bool output_is_tuple() const override { return true; }" \
|
|
65
|
+
if is_op_multi_output(op_proto.op_returns) else ''
|
|
66
|
+
pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE.replace(op_name=op_name_str,
|
|
67
|
+
op_name_upper=op_name_str.upper(),
|
|
68
|
+
call_args=call_args_with_type,
|
|
69
|
+
return_type=cpp_func_return,
|
|
70
|
+
output_is_tuple=output_is_tuple)
|
|
71
|
+
save_path = os.path.join(work_path, f"{K.MS_PYBOOST_BASE_PATH}/auto_generate/")
|
|
72
|
+
file_name = f"{op_proto.op_name}.h"
|
|
73
|
+
save_file(save_path, file_name, pyboost_op_header_str)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class PyboostOpHeaderGenerator(BaseGenerator):
|
|
77
|
+
"""
|
|
78
|
+
Generates device-specific C++ headers for PyBoost operations.
|
|
79
|
+
|
|
80
|
+
This class generates header files for different devices (Ascend, CPU, GPU) and defines
|
|
81
|
+
the operation functions accordingly.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def __init__(self, device):
|
|
85
|
+
"""
|
|
86
|
+
Initializes the PyboostOpHeaderGenerator with the appropriate templates for the specified device.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
device (str): The target device (ascend, gpu, or cpu).
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If the device is not supported.
|
|
93
|
+
"""
|
|
94
|
+
template_dict = {"ascend": template.PYBOOST_ASCEND_OP_HEADER_TEMPLATE,
|
|
95
|
+
"gpu": template.PYBOOST_GPU_OP_HEADER_TEMPLATE,
|
|
96
|
+
"cpu": template.PYBOOST_CPU_OP_HEADER_TEMPLATE}
|
|
97
|
+
if device not in template_dict:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
100
|
+
self.PYBOOST_OP_HEADER_TEMPLATE = template_dict[device]
|
|
101
|
+
self.code_generate_path = f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/"
|
|
102
|
+
self.hccl_code_generate_path = "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/"
|
|
103
|
+
self.device = device
|
|
104
|
+
|
|
105
|
+
def generate(self, work_path, op_protos):
|
|
106
|
+
"""
|
|
107
|
+
Generates header files for the provided operator prototypes based on the device.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
work_path (str): The directory path where the header files will be saved.
|
|
111
|
+
op_protos (list): A list of operator prototypes containing information about the operators.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
None
|
|
115
|
+
"""
|
|
116
|
+
for op_proto in op_protos:
|
|
117
|
+
if op_proto.op_dispatch is None:
|
|
118
|
+
continue
|
|
119
|
+
if getattr(op_proto.op_dispatch, self.device) == 'None':
|
|
120
|
+
continue
|
|
121
|
+
is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
|
|
122
|
+
op_parser = OpTemplateParser(op_proto)
|
|
123
|
+
op_name_str = op_proto.op_class.name
|
|
124
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
125
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
126
|
+
|
|
127
|
+
pyboost_op_str = self.PYBOOST_OP_HEADER_TEMPLATE.replace(op_name=op_name_str,
|
|
128
|
+
op_name_upper=op_name_str.upper(),
|
|
129
|
+
operator_name=op_proto.op_name,
|
|
130
|
+
call_args_with_type=call_args_with_type,
|
|
131
|
+
return_type=cpp_func_return)
|
|
132
|
+
|
|
133
|
+
save_path = os.path.join(work_path, self.code_generate_path if not is_ascend_comm_op else self.hccl_code_generate_path)
|
|
134
|
+
file_name = f"{op_proto.op_name}.h"
|
|
135
|
+
save_file(save_path, file_name, pyboost_op_str)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class PyboostOpCppGenerator:
|
|
139
|
+
"""
|
|
140
|
+
Generates C++ source files for PyBoost operations.
|
|
141
|
+
|
|
142
|
+
This class generates the implementation of operations for different devices, handling function calls
|
|
143
|
+
and registering custom kernels as necessary.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(self, device):
|
|
147
|
+
"""
|
|
148
|
+
Initializes the PyboostOpCppGenerator with the appropriate templates for the specified device.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
device (str): The target device (ascend, gpu, or cpu).
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
ValueError: If the device is not supported.
|
|
155
|
+
"""
|
|
156
|
+
if device == 'ascend':
|
|
157
|
+
PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_ASCEND_CUSTOMIZE_CALL_TEMPLATE
|
|
158
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
|
|
159
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
160
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
161
|
+
elif device == 'cpu':
|
|
162
|
+
PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_CPU_CUSTOMIZE_CALL_TEMPLATE
|
|
163
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
|
|
164
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
165
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
166
|
+
elif device == 'gpu':
|
|
167
|
+
PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_GPU_CUSTOMIZE_CALL_TEMPLATE
|
|
168
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
|
|
169
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
170
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
174
|
+
self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE = PYBOOST_CUSTOMIZE_CALL_TEMPLATE
|
|
175
|
+
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
|
|
176
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
|
|
177
|
+
self.PYBOOST_SINGLE_HCLL_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_HCLL_OP_HEADER_TEMPLATE
|
|
178
|
+
self.gen_path = gen_path
|
|
179
|
+
self.device = device
|
|
180
|
+
|
|
181
|
+
def generate_customize_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, merge_op_inc,
|
|
182
|
+
merge_op_hccl_header=None, merge_op_hccl_function=None, merge_op_hccl_inc=None):
|
|
183
|
+
"""
|
|
184
|
+
Generate C++ code for PyBoost operations using the provided operation prototypes.
|
|
185
|
+
|
|
186
|
+
This method processes a list of operation prototypes, generates customized function call
|
|
187
|
+
implementations, and updates the merged headers and functions for the specified device.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
op_protos (list): A list of operation prototypes to process. Each prototype contains
|
|
191
|
+
metadata about the operation, including dispatch settings and arguments.
|
|
192
|
+
merge_op_header (list): A list to store the generated C++ header code for operations.
|
|
193
|
+
merge_op_function (list): A list to store the generated C++ source code for operations.
|
|
194
|
+
"""
|
|
195
|
+
for op_proto in op_protos:
|
|
196
|
+
if op_proto.op_dispatch is None:
|
|
197
|
+
continue
|
|
198
|
+
if getattr(op_proto.op_dispatch, self.device) == 'default':
|
|
199
|
+
continue
|
|
200
|
+
if getattr(op_proto.op_dispatch, self.device) == 'None':
|
|
201
|
+
continue
|
|
202
|
+
is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
|
|
203
|
+
op_parser = OpTemplateParser(op_proto)
|
|
204
|
+
call_args = op_parser.parse_original_call_args(op_proto.op_args)
|
|
205
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
206
|
+
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
207
|
+
operator_name = op_proto.op_name
|
|
208
|
+
op_name_str = op_proto.op_class.name
|
|
209
|
+
check_inplace_func = ''
|
|
210
|
+
for arg in op_proto.op_returns:
|
|
211
|
+
if arg.inplace != '':
|
|
212
|
+
check_inplace_func = f'ThrowExpectionWhenInternalOverlap({arg.inplace}_tensor);'
|
|
213
|
+
break
|
|
214
|
+
call_impl = self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE.replace(
|
|
215
|
+
call_args=call_args,
|
|
216
|
+
return_values=call_func_outputs,
|
|
217
|
+
customize_func=getattr(
|
|
218
|
+
op_proto.op_dispatch, self.device) + "Customize",
|
|
219
|
+
check_expression=check_inplace_func,
|
|
220
|
+
)
|
|
221
|
+
if is_ascend_comm_op and ((merge_op_hccl_header is None) or (merge_op_hccl_function is None)):
|
|
222
|
+
raise ValueError(f"merge_op_hccl_header and merge_op_hccl_function must be provided for comm op {operator_name}")
|
|
223
|
+
|
|
224
|
+
if is_ascend_comm_op:
|
|
225
|
+
customize_include = \
|
|
226
|
+
f'#include "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/{operator_name.lower()}.h"\n'
|
|
227
|
+
else:
|
|
228
|
+
customize_include = \
|
|
229
|
+
f'#include "{K.MS_OPS_KERNEL_PATH}/{self.device}/pyboost/customize/{operator_name.lower()}.h"\n'
|
|
230
|
+
|
|
231
|
+
register_custom = self._get_register_custom_kernel(op_proto)
|
|
232
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
233
|
+
if is_ascend_comm_op:
|
|
234
|
+
merge_op_hccl_header.append(self.PYBOOST_SINGLE_HCLL_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
|
|
235
|
+
customize_include=customize_include))
|
|
236
|
+
merge_op_hccl_function.append(
|
|
237
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
|
|
238
|
+
call_args_with_type=call_args_with_type,
|
|
239
|
+
return_type=cpp_func_return, call_impl=call_impl,
|
|
240
|
+
register_custom_kernel=register_custom))
|
|
241
|
+
merge_op_hccl_inc.append(op_name_str)
|
|
242
|
+
else:
|
|
243
|
+
merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
|
|
244
|
+
customize_include=customize_include))
|
|
245
|
+
merge_op_function.append(
|
|
246
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
|
|
247
|
+
call_args_with_type=call_args_with_type,
|
|
248
|
+
return_type=cpp_func_return, call_impl=call_impl,
|
|
249
|
+
register_custom_kernel=register_custom))
|
|
250
|
+
merge_op_inc.append(op_name_str)
|
|
251
|
+
|
|
252
|
+
def _get_register_custom_kernel(self, op_proto: OpProto):
|
|
253
|
+
"""
|
|
254
|
+
Generates the registration code for custom kernels based on the device.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
op_proto (OpProto): The operator prototype to generate registration for.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
str: The registration code for the custom kernel.
|
|
261
|
+
"""
|
|
262
|
+
if self.device == 'ascend':
|
|
263
|
+
register_custom_kernel = ''
|
|
264
|
+
elif self.device == 'cpu':
|
|
265
|
+
register_custom_kernel = f"MS_REG_PYBOOST_CPU_CUSTOM_KERNEL({op_proto.op_class.name});"
|
|
266
|
+
elif self.device == 'gpu':
|
|
267
|
+
register_custom_kernel = f"MS_REG_PYBOOST_GPU_CUSTOM_KERNEL({op_proto.op_class.name});"
|
|
268
|
+
else:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Device must be ascend, gpu, or cpu, {self.device} is not supported")
|
|
271
|
+
return register_custom_kernel
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class PyboostViewOpCppGenerator:
|
|
275
|
+
"""
|
|
276
|
+
Generates C++ source files for view operations in PyBoost.
|
|
277
|
+
|
|
278
|
+
This class handles the generation of source files for view operations, which have special handling
|
|
279
|
+
compared to regular operations.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __init__(self, device):
|
|
283
|
+
"""
|
|
284
|
+
Initializes the PyboostViewOpCppGenerator with the appropriate templates for the specified device.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
device (str): The target device (ascend, gpu, or cpu).
|
|
288
|
+
|
|
289
|
+
Raises:
|
|
290
|
+
ValueError: If the device is not supported.
|
|
291
|
+
"""
|
|
292
|
+
if device == 'ascend':
|
|
293
|
+
PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_ASCEND_VIEW_CALL_TEMPLATE
|
|
294
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
|
|
295
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
296
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
297
|
+
elif device == 'cpu':
|
|
298
|
+
PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_CPU_VIEW_CALL_TEMPLATE
|
|
299
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
|
|
300
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
301
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
302
|
+
elif device == 'gpu':
|
|
303
|
+
PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_GPU_VIEW_CALL_TEMPLATE
|
|
304
|
+
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
|
|
305
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
306
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
307
|
+
else:
|
|
308
|
+
raise ValueError(
|
|
309
|
+
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
310
|
+
self.PYBOOST_VIEW_CALL_TEMPLATE = PYBOOST_VIEW_CALL_TEMPLATE
|
|
311
|
+
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
|
|
312
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
|
|
313
|
+
self.gen_path = gen_path
|
|
314
|
+
self.device = device
|
|
315
|
+
|
|
316
|
+
def generate_view_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, ascend_merge_op_inc):
|
|
317
|
+
"""
|
|
318
|
+
Generate C++ code for view operations in PyBoost.
|
|
319
|
+
|
|
320
|
+
This method processes a list of operation prototypes (`op_protos`) and generates C++ code
|
|
321
|
+
for view operations where `op_view` is set to `True` and the dispatch setting for the target
|
|
322
|
+
device is `'default'`.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
op_protos (list): A list of operation prototypes to process. Each prototype includes
|
|
326
|
+
metadata such as dispatch settings, arguments, and view-specific attributes.
|
|
327
|
+
merge_op_header (list): A list to store the generated C++ header code for view operations.
|
|
328
|
+
merge_op_function (list): A list to store the generated C++ source code for view operations.
|
|
329
|
+
"""
|
|
330
|
+
for op_proto in op_protos:
|
|
331
|
+
if op_proto.op_dispatch is None:
|
|
332
|
+
continue
|
|
333
|
+
if getattr(op_proto.op_dispatch, self.device) != 'default':
|
|
334
|
+
continue
|
|
335
|
+
if getattr(op_proto.op_dispatch, self.device) == 'None':
|
|
336
|
+
continue
|
|
337
|
+
if not op_proto.op_view:
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
op_parser = OpTemplateParser(op_proto)
|
|
341
|
+
call_args_tensor = op_parser.get_call_args_tensor()
|
|
342
|
+
call_args = op_parser.parse_original_call_args(op_proto.op_args)
|
|
343
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
344
|
+
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
345
|
+
call_impl = self.PYBOOST_VIEW_CALL_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
346
|
+
call_args=call_args,
|
|
347
|
+
call_tensors=call_args_tensor,
|
|
348
|
+
return_values=call_func_outputs,
|
|
349
|
+
input=call_args[0])
|
|
350
|
+
customize_include = f'#include "{K.MS_OPS_VIEW_PATH}/{op_proto.op_name}_strides_calc.h"\n'
|
|
351
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
352
|
+
merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
|
|
353
|
+
customize_include=customize_include))
|
|
354
|
+
|
|
355
|
+
merge_op_function.append(
|
|
356
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
357
|
+
call_args_with_type=call_args_with_type,
|
|
358
|
+
return_type=cpp_func_return,
|
|
359
|
+
call_impl=call_impl,
|
|
360
|
+
register_custom_kernel=""))
|
|
361
|
+
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class AclnnOpCppCodeGenerator:
|
|
365
|
+
"""
|
|
366
|
+
Generates C++ source files for ACLNN operations in PyBoost.
|
|
367
|
+
|
|
368
|
+
This class handles the generation of source files for operations that utilize the ACLNN framework,
|
|
369
|
+
including customized calls and tensor management.
|
|
370
|
+
|
|
371
|
+
Attributes:
|
|
372
|
+
PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
|
|
373
|
+
PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
|
|
374
|
+
gen_path (str): Path for saving the generated C++ source files.
|
|
375
|
+
device (str): The target device (ascend, cpu, or gpu).
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
def __init__(self, device):
|
|
379
|
+
"""
|
|
380
|
+
Initializes the AclnnOpCppCodeGenerator with the appropriate templates for the specified device.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
device (str): The target device (ascend, gpu, or cpu).
|
|
384
|
+
|
|
385
|
+
Raises:
|
|
386
|
+
ValueError: If the device is not supported.
|
|
387
|
+
"""
|
|
388
|
+
if device == 'ascend':
|
|
389
|
+
PYBOOST_CALL_TEMPLATE = template.PYBOOST_ASCEND_CALL_TEMPLATE
|
|
390
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
391
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
392
|
+
elif device == 'cpu':
|
|
393
|
+
PYBOOST_CALL_TEMPLATE = template.PYBOOST_CPU_CALL_TEMPLATE
|
|
394
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
395
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
396
|
+
elif device == 'gpu':
|
|
397
|
+
PYBOOST_CALL_TEMPLATE = template.PYBOOST_GPU_CALL_TEMPLATE
|
|
398
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_SOURCE_TEMPLATE
|
|
399
|
+
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
400
|
+
else:
|
|
401
|
+
raise ValueError(
|
|
402
|
+
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
403
|
+
self.PYBOOST_CALL_TEMPLATE = PYBOOST_CALL_TEMPLATE
|
|
404
|
+
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.Template(
|
|
405
|
+
'#include "kernel/${device}/pyboost/auto_generate/${operator_name}.h"\n'
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
|
|
409
|
+
self.gen_path = gen_path
|
|
410
|
+
self.device = device
|
|
411
|
+
|
|
412
|
+
def generate_aclnn_op_cpp_code(self, op_protos, merge_op_header, merge_op_function, ascend_merge_op_inc):
|
|
413
|
+
"""
|
|
414
|
+
Generate C++ code for ACLNN operations in PyBoost.
|
|
415
|
+
|
|
416
|
+
This method processes a list of operation prototypes (`op_protos`) and generates C++ code
|
|
417
|
+
for aclnn operations. The method filters the operation
|
|
418
|
+
prototypes based on their dispatch and view settings, and then uses templates and metadata
|
|
419
|
+
to generate the necessary implementation and header files.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
op_protos (list): A list of operation prototypes. Each prototype includes metadata
|
|
423
|
+
such as operation name, dispatch settings, view attributes, and arguments.
|
|
424
|
+
merge_op_header (list): A list to store the generated C++ header code for ACLNN operations.
|
|
425
|
+
merge_op_function (list): A list to store the generated C++ source code for ACLNN operations.
|
|
426
|
+
"""
|
|
427
|
+
for op_proto in op_protos:
|
|
428
|
+
if op_proto.op_dispatch is None:
|
|
429
|
+
continue
|
|
430
|
+
if getattr(op_proto.op_dispatch, self.device) != 'default':
|
|
431
|
+
continue
|
|
432
|
+
if getattr(op_proto.op_dispatch, self.device) == 'None':
|
|
433
|
+
continue
|
|
434
|
+
if op_proto.op_view:
|
|
435
|
+
continue
|
|
436
|
+
|
|
437
|
+
op_parser = OpTemplateParser(op_proto)
|
|
438
|
+
aclnn_name = AclnnUtils.get_aclnn_interface(op_proto.op_class.name)
|
|
439
|
+
|
|
440
|
+
call_args_tensor = op_parser.get_call_args_tensor()
|
|
441
|
+
create_input_address = self._generate_create_input_address(
|
|
442
|
+
op_parser)
|
|
443
|
+
malloc_inputs = self._generate_malloc_input(op_parser)
|
|
444
|
+
op_outputs, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
445
|
+
get_inputs_kernel_tensors = self._generate_get_inputs_kernel_tensors(
|
|
446
|
+
op_parser)
|
|
447
|
+
|
|
448
|
+
cube_math_type, get_cube_math_type = '', ''
|
|
449
|
+
if self.device == 'ascend' and is_cube(op_proto.op_class.name):
|
|
450
|
+
get_cube_math_type = f'// cubeMathType: 0 - KEEP_DTYPE, 1 - ALLOW_FP32_DOWN_PRECISION\n'
|
|
451
|
+
get_cube_math_type += "auto cube_math_type = GetCubeMathType();"
|
|
452
|
+
cube_math_type = ', cube_math_type'
|
|
453
|
+
|
|
454
|
+
real_output = ', ' + op_outputs \
|
|
455
|
+
if _generate_inplace_process_cpp_code(op_proto) == '' else ''
|
|
456
|
+
|
|
457
|
+
cast_input_code, real_call_args_tensor = self._generate_tensor_cpu_cast_input_code(
|
|
458
|
+
op_parser)
|
|
459
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
460
|
+
_, tensor_list_convert, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
|
|
461
|
+
call_args_after_convert, value_tuple_convert, const_number_convert = op_parser.op_args_converter()
|
|
462
|
+
call_args = op_parser.parse_original_call_args(op_proto.op_args)
|
|
463
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
464
|
+
inplace_process = _generate_inplace_process_cpp_code(op_proto)
|
|
465
|
+
call_impl = self.PYBOOST_CALL_TEMPLATE.replace(aclnn_name=aclnn_name,
|
|
466
|
+
call_args=call_args,
|
|
467
|
+
call_tensors=call_args_tensor,
|
|
468
|
+
value_tuple_convert=value_tuple_convert,
|
|
469
|
+
const_number_convert=const_number_convert,
|
|
470
|
+
create_input_address=create_input_address,
|
|
471
|
+
tensor_list_convert=tensor_list_convert,
|
|
472
|
+
call_args_with_tensor=call_args_with_tensor,
|
|
473
|
+
malloc_inputs=malloc_inputs,
|
|
474
|
+
get_inputs_kernel_tensors=get_inputs_kernel_tensors,
|
|
475
|
+
get_cube_math_type=get_cube_math_type,
|
|
476
|
+
cube_math_type=cube_math_type,
|
|
477
|
+
real_call_args=call_args_after_convert,
|
|
478
|
+
return_values=call_func_outputs,
|
|
479
|
+
outputs=real_output,
|
|
480
|
+
inplace_process=inplace_process,
|
|
481
|
+
cast_input_code=cast_input_code,
|
|
482
|
+
real_call_args_tensor=real_call_args_tensor,
|
|
483
|
+
class_name=op_proto.op_class.name,
|
|
484
|
+
op_name_str=op_proto.op_class.name)
|
|
485
|
+
|
|
486
|
+
merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
|
|
487
|
+
device=self.device))
|
|
488
|
+
|
|
489
|
+
merge_op_function.append(
|
|
490
|
+
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
491
|
+
call_args_with_type=call_args_with_type,
|
|
492
|
+
return_type=cpp_func_return,
|
|
493
|
+
call_impl=call_impl,
|
|
494
|
+
register_custom_kernel=''))
|
|
495
|
+
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
496
|
+
|
|
497
|
+
def _generate_tensor_cpu_cast_input_code(self, op_parser: OpTemplateParser):
|
|
498
|
+
"""
|
|
499
|
+
Generates the input casting code for CPU tensor operations.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
op_parser (OpTemplateParser): The parser object for the operation prototype.
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
tuple: A tuple containing the casting code and the updated tensor call arguments.
|
|
506
|
+
"""
|
|
507
|
+
_, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
|
|
508
|
+
call_tensors = op_parser.get_call_args_tensor()
|
|
509
|
+
cast_input = ""
|
|
510
|
+
real_call_args_tensor = call_args_with_tensor.copy()
|
|
511
|
+
for i, tensor in enumerate(call_args_with_tensor):
|
|
512
|
+
is_tuple_tensor = real_call_args_tensor[i].endswith("_vector")
|
|
513
|
+
is_tensor = real_call_args_tensor[i] in call_tensors
|
|
514
|
+
if is_tensor:
|
|
515
|
+
cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
|
|
516
|
+
f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
|
|
517
|
+
real_call_args_tensor[i] = "real_" + real_call_args_tensor[i]
|
|
518
|
+
if is_tuple_tensor:
|
|
519
|
+
cast_input += f'const auto &real_{tensor} = PyBoostUtils::CastTensor({tensor}, ' \
|
|
520
|
+
f'select_kernel.input_type()[{i}].dtype, "CPU");\n'
|
|
521
|
+
real_call_args_tensor[i] = "PyBoostUtils::ConvertTensorVectorToTuple(real_" + real_call_args_tensor[
|
|
522
|
+
i] + ")"
|
|
523
|
+
if cast_input != "":
|
|
524
|
+
cast_input = "auto &select_kernel = kernel_attr_pair.second;\n" + cast_input
|
|
525
|
+
return cast_input, real_call_args_tensor
|
|
526
|
+
|
|
527
|
+
def _generate_create_input_address(self, op_parser: OpTemplateParser):
|
|
528
|
+
need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
|
|
529
|
+
create_input_address = ''
|
|
530
|
+
args_list = ''
|
|
531
|
+
for item in need_malloc_tensors:
|
|
532
|
+
args_list += f'{item}, '
|
|
533
|
+
args_list = args_list[:-2]
|
|
534
|
+
if args_list:
|
|
535
|
+
create_input_address = f'PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), {args_list});\n'
|
|
536
|
+
return create_input_address
|
|
537
|
+
|
|
538
|
+
def _generate_malloc_input(self, op_parser: OpTemplateParser):
|
|
539
|
+
"""
|
|
540
|
+
Generates the code for creating input addresses for tensors that need to be allocated.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
op_parser (OpTemplateParser): The parser object for the operation prototype.
|
|
544
|
+
|
|
545
|
+
Returns:
|
|
546
|
+
str: The generated code for creating input addresses.
|
|
547
|
+
"""
|
|
548
|
+
need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
|
|
549
|
+
malloc_inputs = ''
|
|
550
|
+
args_list = ''
|
|
551
|
+
for item in need_malloc_tensors:
|
|
552
|
+
args_list += f'{item}, '
|
|
553
|
+
args_list = args_list[:-2]
|
|
554
|
+
if args_list:
|
|
555
|
+
malloc_inputs += f'PyBoostUtils::MallocOpInputs(device_context, {args_list});\n'
|
|
556
|
+
return malloc_inputs
|
|
557
|
+
|
|
558
|
+
def _generate_get_inputs_kernel_tensors(self, op_parser: OpTemplateParser):
|
|
559
|
+
"""
|
|
560
|
+
Generates the code for retrieving input kernel tensors.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
op_parser (OpTemplateParser): The parser object for the operation prototype.
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
str: The generated code for retrieving input kernel tensors.
|
|
567
|
+
"""
|
|
568
|
+
_, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
|
|
569
|
+
inputs_kernel_tensors = ''
|
|
570
|
+
args_list = ''
|
|
571
|
+
for item in call_args_with_tensor:
|
|
572
|
+
args_list += f'{item}, '
|
|
573
|
+
args_list = args_list[:-2]
|
|
574
|
+
if args_list:
|
|
575
|
+
inputs_kernel_tensors += f'const auto &input_address_info = PyBoostUtils::GetAddressInfo(' \
|
|
576
|
+
f'device_context, op->stream_id(), op->input_abs(), {args_list});\n'
|
|
577
|
+
return inputs_kernel_tensors
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
class PyboostOpFunctionGenerator(BaseGenerator):
|
|
581
|
+
"""
|
|
582
|
+
Generates C++ source files for ACLNN operations in PyBoost.
|
|
583
|
+
|
|
584
|
+
This class handles the generation of source files for operations that utilize the ACLNN framework,
|
|
585
|
+
including customized calls and tensor management.
|
|
586
|
+
|
|
587
|
+
Attributes:
|
|
588
|
+
PYBOOST_CALL_TEMPLATE (Template): Template for generating ACLNN operation calls.
|
|
589
|
+
PYBOOST_OP_SOURCE_TEMPLATE (Template): Template for generating operation source files.
|
|
590
|
+
gen_path (str): Path for saving the generated C++ source files.
|
|
591
|
+
device (str): The target device (ascend, cpu, or gpu).
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
def __init__(self):
|
|
595
|
+
self.ascend_op_cpp_generator = PyboostOpCppGenerator('ascend')
|
|
596
|
+
self.ascend_view_op_cpp_generator = PyboostViewOpCppGenerator('ascend')
|
|
597
|
+
self.ascend_aclnn_cpp_generator = AclnnOpCppCodeGenerator('ascend')
|
|
598
|
+
|
|
599
|
+
self.cpu_op_cpp_generator = PyboostOpCppGenerator('cpu')
|
|
600
|
+
self.cpu_view_op_cpp_generator = PyboostViewOpCppGenerator('cpu')
|
|
601
|
+
self.cpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('cpu')
|
|
602
|
+
|
|
603
|
+
self.gpu_op_cpp_generator = PyboostOpCppGenerator('gpu')
|
|
604
|
+
self.gpu_view_op_cpp_generator = PyboostViewOpCppGenerator('gpu')
|
|
605
|
+
self.gpu_aclnn_cpp_generator = AclnnOpCppCodeGenerator('gpu')
|
|
606
|
+
|
|
607
|
+
self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE
|
|
608
|
+
self.PYBOOST_CPU_OP_SOURCE_TEMPLATE = template.PYBOOST_CPU_OP_SOURCE_TEMPLATE
|
|
609
|
+
self.PYBOOST_GPU_OP_SOURCE_TEMPLATE = template.PYBOOST_GPU_OP_SOURCE_TEMPLATE
|
|
610
|
+
self.ascend_gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
611
|
+
self.cpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
612
|
+
self.gpu_gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
613
|
+
self.hccl_gen_path = "mindspore/ccsrc/plugin/device/ascend/kernel/hccl/pyboost/auto_generate/"
|
|
614
|
+
|
|
615
|
+
def generate(self, work_path, op_protos):
|
|
616
|
+
"""
|
|
617
|
+
Generate and save C++ source code for PyBoost operations across different devices.
|
|
618
|
+
|
|
619
|
+
This method generates C++ source files for operations (`op_protos`) tailored to Ascend, CPU,
|
|
620
|
+
and GPU devices. It combines headers and function implementations for each device, and then
|
|
621
|
+
saves the final source files to the appropriate paths.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
op_protos (list): A list of operation prototypes containing metadata such as
|
|
625
|
+
operation name, dispatch settings, arguments, and view attributes.
|
|
626
|
+
work_path (str): The base working directory where the generated files will be saved.
|
|
627
|
+
|
|
628
|
+
Generated Files:
|
|
629
|
+
- Ascend: `pyboost_ascend_ops.cc`
|
|
630
|
+
- CPU: `pyboost_cpu_ops.cc`
|
|
631
|
+
- GPU: `pyboost_gpu_ops.cc`
|
|
632
|
+
"""
|
|
633
|
+
self._generate_pyboost_ascend_ops(work_path, op_protos)
|
|
634
|
+
self._generate_pyboost_cpu_ops(work_path, op_protos)
|
|
635
|
+
self._generate_pyboost_gpu_ops(work_path, op_protos)
|
|
636
|
+
|
|
637
|
+
def _generate_pyboost_ascend_ops(self, work_path, op_protos):
|
|
638
|
+
"""
|
|
639
|
+
Generates Ascend PyBoost ops functions source files after being merged into specific chunk sizes.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
work_path (str): The directory path where the generated C++ source files will be saved.
|
|
643
|
+
op_protos (list): A list of operation prototypes that define the operations for which
|
|
644
|
+
the C++ code will be generated.
|
|
645
|
+
"""
|
|
646
|
+
ascend_merge_op_header = []
|
|
647
|
+
ascend_merge_op_function = []
|
|
648
|
+
hccl_merge_op_header = []
|
|
649
|
+
hccl_merge_op_function = []
|
|
650
|
+
ascend_merge_op_inc = []
|
|
651
|
+
ascend_merge_op_hccl_inc = []
|
|
652
|
+
self.ascend_op_cpp_generator.generate_customize_op_cpp_code(op_protos, ascend_merge_op_header,
|
|
653
|
+
ascend_merge_op_function, ascend_merge_op_inc,
|
|
654
|
+
hccl_merge_op_header, hccl_merge_op_function,
|
|
655
|
+
ascend_merge_op_hccl_inc)
|
|
656
|
+
self.ascend_view_op_cpp_generator.generate_view_op_cpp_code(op_protos, ascend_merge_op_header,
|
|
657
|
+
ascend_merge_op_function,
|
|
658
|
+
ascend_merge_op_inc)
|
|
659
|
+
self.ascend_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, ascend_merge_op_header,
|
|
660
|
+
ascend_merge_op_function,
|
|
661
|
+
ascend_merge_op_inc)
|
|
662
|
+
|
|
663
|
+
ascend_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
664
|
+
ascend_merge_op_header, chunk_size=120)
|
|
665
|
+
ascend_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
666
|
+
ascend_merge_op_function, chunk_size=120)
|
|
667
|
+
op_inc_list = chunk_list(ascend_merge_op_inc, n=120)
|
|
668
|
+
|
|
669
|
+
new_gen_num = len(ascend_op_header_merge_by_chunk_size)
|
|
670
|
+
self._delete_residual_merged_ops_files(os.path.join(
|
|
671
|
+
work_path, self.ascend_gen_path), new_gen_num)
|
|
672
|
+
|
|
673
|
+
for i, op_header, op_function in zip(range(len(ascend_op_header_merge_by_chunk_size)),
|
|
674
|
+
ascend_op_header_merge_by_chunk_size,
|
|
675
|
+
ascend_op_function_merge_by_chunk_size):
|
|
676
|
+
ops_inc_head_set = set()
|
|
677
|
+
for op_name_inc in op_inc_list[i]:
|
|
678
|
+
ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
|
|
679
|
+
|
|
680
|
+
ascend_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
|
|
681
|
+
merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
|
|
682
|
+
save_file(os.path.join(work_path, self.ascend_gen_path), f"pyboost_ascend_ops_{i}.cc",
|
|
683
|
+
ascend_pyboost_op_source)
|
|
684
|
+
|
|
685
|
+
ops_hccl_inc_head_set = set()
|
|
686
|
+
for op_name_inc in ascend_merge_op_hccl_inc:
|
|
687
|
+
ops_hccl_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
|
|
688
|
+
hccl_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
|
|
689
|
+
merge_op_header='\n'.join(hccl_merge_op_header), merge_op_function='\n'.join(hccl_merge_op_function),
|
|
690
|
+
ops_inc=list(sorted(ops_hccl_inc_head_set)))
|
|
691
|
+
save_file(os.path.join(work_path, self.hccl_gen_path), f"pyboost_hccl_ops.cc",
|
|
692
|
+
hccl_pyboost_op_source)
|
|
693
|
+
|
|
694
|
+
def _generate_pyboost_cpu_ops(self, work_path, op_protos):
|
|
695
|
+
"""
|
|
696
|
+
Generates CPU PyBoost ops functions source files after being merged into specific chunk sizes.
|
|
697
|
+
|
|
698
|
+
Args:
|
|
699
|
+
work_path (str): The directory path where the generated C++ source files will be saved.
|
|
700
|
+
op_protos (list): A list of operation prototypes that define the operations for which
|
|
701
|
+
the C++ code will be generated.
|
|
702
|
+
"""
|
|
703
|
+
cpu_merge_op_header = []
|
|
704
|
+
cpu_merge_op_function = []
|
|
705
|
+
cpu_merge_op_inc = []
|
|
706
|
+
self.cpu_op_cpp_generator.generate_customize_op_cpp_code(
|
|
707
|
+
op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
|
|
708
|
+
self.cpu_view_op_cpp_generator.generate_view_op_cpp_code(
|
|
709
|
+
op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
|
|
710
|
+
self.cpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(
|
|
711
|
+
op_protos, cpu_merge_op_header, cpu_merge_op_function, cpu_merge_op_inc)
|
|
712
|
+
cpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
713
|
+
cpu_merge_op_header, chunk_size=120)
|
|
714
|
+
cpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
715
|
+
cpu_merge_op_function, chunk_size=120)
|
|
716
|
+
op_inc_list = chunk_list(cpu_merge_op_inc, n=120)
|
|
717
|
+
|
|
718
|
+
new_gen_num = len(cpu_op_header_merge_by_chunk_size)
|
|
719
|
+
self._delete_residual_merged_ops_files(
|
|
720
|
+
os.path.join(work_path, self.cpu_gen_path), new_gen_num)
|
|
721
|
+
|
|
722
|
+
for i, op_header, op_function in zip(range(len(cpu_op_header_merge_by_chunk_size)),
|
|
723
|
+
cpu_op_header_merge_by_chunk_size,
|
|
724
|
+
cpu_op_function_merge_by_chunk_size):
|
|
725
|
+
ops_inc_head_set = set()
|
|
726
|
+
for op_name_inc in op_inc_list[i]:
|
|
727
|
+
ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
|
|
728
|
+
cpu_pyboost_op_source = self.PYBOOST_CPU_OP_SOURCE_TEMPLATE.replace(
|
|
729
|
+
merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
|
|
730
|
+
save_file(os.path.join(work_path, self.cpu_gen_path), f"pyboost_cpu_ops_{i}.cc",
|
|
731
|
+
cpu_pyboost_op_source)
|
|
732
|
+
|
|
733
|
+
def _generate_pyboost_gpu_ops(self, work_path, op_protos):
|
|
734
|
+
"""
|
|
735
|
+
Generates GPU PyBoost ops functions source files after being merged into specific chunk sizes.
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
work_path (str): The directory path where the generated C++ source files will be saved.
|
|
739
|
+
op_protos (list): A list of operation prototypes that define the operations for which
|
|
740
|
+
the C++ code will be generated.
|
|
741
|
+
"""
|
|
742
|
+
gpu_merge_op_header = []
|
|
743
|
+
gpu_merge_op_function = []
|
|
744
|
+
gpu_merge_op_inc = []
|
|
745
|
+
self.gpu_op_cpp_generator.generate_customize_op_cpp_code(
|
|
746
|
+
op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
|
|
747
|
+
self.gpu_view_op_cpp_generator.generate_view_op_cpp_code(
|
|
748
|
+
op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
|
|
749
|
+
self.gpu_aclnn_cpp_generator.generate_aclnn_op_cpp_code(
|
|
750
|
+
op_protos, gpu_merge_op_header, gpu_merge_op_function, gpu_merge_op_inc)
|
|
751
|
+
gpu_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
752
|
+
gpu_merge_op_header, chunk_size=120)
|
|
753
|
+
gpu_op_function_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
754
|
+
gpu_merge_op_function, chunk_size=120)
|
|
755
|
+
op_inc_list = chunk_list(gpu_merge_op_inc, n=120)
|
|
756
|
+
|
|
757
|
+
new_gen_num = len(gpu_op_header_merge_by_chunk_size)
|
|
758
|
+
self._delete_residual_merged_ops_files(
|
|
759
|
+
os.path.join(work_path, self.gpu_gen_path), new_gen_num)
|
|
760
|
+
|
|
761
|
+
for i, op_header, op_function in zip(range(len(gpu_op_header_merge_by_chunk_size)),
|
|
762
|
+
gpu_op_header_merge_by_chunk_size,
|
|
763
|
+
gpu_op_function_merge_by_chunk_size):
|
|
764
|
+
ops_inc_head_set = set()
|
|
765
|
+
for op_name_inc in op_inc_list[i]:
|
|
766
|
+
ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
|
|
767
|
+
gpu_pyboost_op_source = self.PYBOOST_GPU_OP_SOURCE_TEMPLATE.replace(
|
|
768
|
+
merge_op_header=op_header, merge_op_function=op_function, ops_inc=list(sorted(ops_inc_head_set)))
|
|
769
|
+
save_file(os.path.join(work_path, self.gpu_gen_path), f"pyboost_gpu_ops_{i}.cc",
|
|
770
|
+
gpu_pyboost_op_source)
|
|
771
|
+
|
|
772
|
+
def _delete_residual_merged_ops_files(self, files_path, new_gen_num):
|
|
773
|
+
"""
|
|
774
|
+
Deletes residual merged operation files in the specified directory if the number of
|
|
775
|
+
newly generated files does not match the number of existing ones.
|
|
776
|
+
|
|
777
|
+
This method first lists all files in the specified directory, then filters out the files
|
|
778
|
+
that match the pattern `pyboost_.*_ops_.*.cc` (i.e., files related to pyboost ops). It compares
|
|
779
|
+
the number of such files (`old_files_num`) with the `new_gen_num` argument, which represents
|
|
780
|
+
the expected number of new pyboost ops files. If the counts do not match, the method will
|
|
781
|
+
delete all the existing pyboost ops files in the directory before any new ones can be generated.
|
|
782
|
+
|
|
783
|
+
Args:
|
|
784
|
+
files_path (str): The path to the directory containing the files to be checked and deleted.
|
|
785
|
+
new_gen_num (int): The number of newly generated pyboost ops files expected to be in the directory.
|
|
786
|
+
|
|
787
|
+
Returns:
|
|
788
|
+
None
|
|
789
|
+
"""
|
|
790
|
+
all_files = os.listdir(files_path)
|
|
791
|
+
old_pyboost_ops_files = [file for file in all_files if re.match(
|
|
792
|
+
r'pyboost_.*_ops_.*\.cc', file)]
|
|
793
|
+
old_files_num = len(old_pyboost_ops_files)
|
|
794
|
+
if new_gen_num != old_files_num:
|
|
795
|
+
for file in old_pyboost_ops_files:
|
|
796
|
+
os.remove(os.path.join(files_path, file))
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def _generate_cpp_func_return(op_proto):
|
|
800
|
+
"""Generates the C++ return type for the given operator prototype.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
op_proto (OpProto): The operator prototype containing return information.
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
str: The C++ return type for the function based on the operator prototype.
|
|
807
|
+
|
|
808
|
+
Raises:
|
|
809
|
+
Exception: If no return type is found.
|
|
810
|
+
"""
|
|
811
|
+
returns_type = []
|
|
812
|
+
type_convert_to_base = {
|
|
813
|
+
'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::BaseTensorPtr>',
|
|
814
|
+
'mindspore::tensor::TensorPtr': 'mindspore::tensor::BaseTensorPtr'
|
|
815
|
+
}
|
|
816
|
+
for return_obj in op_proto.op_returns:
|
|
817
|
+
temp_return = get_return_type(return_obj.arg_dtype)
|
|
818
|
+
if temp_return in type_convert_to_base:
|
|
819
|
+
returns_type.append(type_convert_to_base[temp_return])
|
|
820
|
+
else:
|
|
821
|
+
raise Exception("Not return found")
|
|
822
|
+
if len(returns_type) == 1:
|
|
823
|
+
cpp_func_return = returns_type[0]
|
|
824
|
+
elif len(returns_type) > 1:
|
|
825
|
+
cpp_func_return = "std::tuple<"
|
|
826
|
+
cpp_func_return += ','.join(s for s in returns_type)
|
|
827
|
+
cpp_func_return += ">"
|
|
828
|
+
else:
|
|
829
|
+
raise Exception("Not return found")
|
|
830
|
+
return cpp_func_return
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def _generate_inplace_process_cpp_code(op_proto):
|
|
834
|
+
"""Generates C++ code for updating outputs by input tensors for inplace processing.
|
|
835
|
+
|
|
836
|
+
Args:
|
|
837
|
+
op_proto (OpProto): The operator prototype containing return information.
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
str: The C++ code for inplace processing, or an empty string if no inplace processing is needed.
|
|
841
|
+
"""
|
|
842
|
+
inplace_process = f'// RefOps update output by input tensor\n'
|
|
843
|
+
has_ref = False
|
|
844
|
+
for index, return_obj in enumerate(op_proto.op_returns):
|
|
845
|
+
if return_obj.inplace != '':
|
|
846
|
+
inplace_process += f'outputs_[{index}]->set_device_address(' \
|
|
847
|
+
f'{return_obj.inplace}_tensor->device_address()); '
|
|
848
|
+
has_ref = True
|
|
849
|
+
break
|
|
850
|
+
if has_ref:
|
|
851
|
+
return inplace_process
|
|
852
|
+
return ''
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def delete_residual_files(work_path, op_protos):
|
|
856
|
+
"""
|
|
857
|
+
Deletes residual files generated for operator prototypes that are no longer needed.
|
|
858
|
+
|
|
859
|
+
Args:
|
|
860
|
+
work_path (str): The base directory path where generated files are located.
|
|
861
|
+
op_protos (list): A list of operator prototypes that are currently valid.
|
|
862
|
+
|
|
863
|
+
Returns:
|
|
864
|
+
None
|
|
865
|
+
"""
|
|
866
|
+
all_operator_name = []
|
|
867
|
+
for op_proto in op_protos:
|
|
868
|
+
all_operator_name.append(op_proto.op_name)
|
|
869
|
+
code_generate_path_list = [f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/" for device in
|
|
870
|
+
["ascend", "gpu", "cpu"]]
|
|
871
|
+
code_generate_path_list.append(
|
|
872
|
+
f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
|
|
873
|
+
for code_generate_path in code_generate_path_list:
|
|
874
|
+
filter_files = []
|
|
875
|
+
code_generate_path = os.path.join(work_path, code_generate_path)
|
|
876
|
+
if os.path.exists(code_generate_path):
|
|
877
|
+
all_files = os.listdir(code_generate_path)
|
|
878
|
+
# No need to delete pyboost_.*_ops_.*.cc files and op_register.cc.
|
|
879
|
+
# These residual files will be deleted before new files generate.
|
|
880
|
+
filter_files = [file for file in all_files if
|
|
881
|
+
not re.match(r'pyboost_.*_ops_.*\.cc', file) and file != "op_register.cc"]
|
|
882
|
+
registered_op_name = set(item.split(".")[0] for item in filter_files)
|
|
883
|
+
need_clean_op = registered_op_name - set(all_operator_name)
|
|
884
|
+
|
|
885
|
+
for file in filter_files:
|
|
886
|
+
file_name = file.split(".")[0]
|
|
887
|
+
if file_name in need_clean_op:
|
|
888
|
+
file_path = os.path.join(code_generate_path, file)
|
|
889
|
+
if os.path.exists(file_path):
|
|
890
|
+
os.remove(file_path)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
class PyboostOpRegisterCppCodeGenerator:
|
|
894
|
+
"""
|
|
895
|
+
Generates registration C++ code for PyBoost operations.
|
|
896
|
+
|
|
897
|
+
This class is responsible for creating a registration source file that includes
|
|
898
|
+
all the necessary headers and template instantiations for the registered operations.
|
|
899
|
+
|
|
900
|
+
Attributes:
|
|
901
|
+
PYBOOST_OP_REGISTER_TEMPLATE (Template): Template for generating the operation registration code.
|
|
902
|
+
"""
|
|
903
|
+
|
|
904
|
+
def __init__(self):
|
|
905
|
+
self.PYBOOST_OP_REGISTER_TEMPLATE = template.PYBOOST_OP_REGISTER_TEMPLATE
|
|
906
|
+
|
|
907
|
+
def generate(self, work_path, op_protos):
|
|
908
|
+
"""
|
|
909
|
+
Generates a C++ source file for registering all PyBoost operations.
|
|
910
|
+
|
|
911
|
+
Args:
|
|
912
|
+
work_path (str): The directory path where the registration file will be saved.
|
|
913
|
+
op_protos (list): A list of operator prototypes containing information about the operations.
|
|
914
|
+
|
|
915
|
+
Returns:
|
|
916
|
+
None
|
|
917
|
+
"""
|
|
918
|
+
all_op_names = []
|
|
919
|
+
all_functional_names = []
|
|
920
|
+
for op_proto in op_protos:
|
|
921
|
+
if op_proto.op_dispatch is None:
|
|
922
|
+
continue
|
|
923
|
+
functional_name = op_proto.op_name
|
|
924
|
+
op_name_str = op_proto.op_class.name
|
|
925
|
+
all_op_names.append(op_name_str)
|
|
926
|
+
all_functional_names.append(functional_name)
|
|
927
|
+
|
|
928
|
+
include_str = ''
|
|
929
|
+
factory_str = ''
|
|
930
|
+
for op_name in all_op_names:
|
|
931
|
+
factory_str += "template class OpFactory<{0}>;\n".format(op_name)
|
|
932
|
+
for operator_name in all_functional_names:
|
|
933
|
+
include_str += f'#include "{K.MS_PYBOOST_BASE_PATH}/auto_generate/{operator_name}.h"\n'
|
|
934
|
+
op_register_file_str = self.PYBOOST_OP_REGISTER_TEMPLATE.replace(op_includes=include_str,
|
|
935
|
+
op_factory_templates=factory_str)
|
|
936
|
+
save_path = os.path.join(work_path, f"{K.MS_PYBOOST_BASE_PATH}/auto_generate/")
|
|
937
|
+
file_name = "op_register.cc"
|
|
938
|
+
save_file(save_path, file_name, op_register_file_str)
|