mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -23,6 +23,7 @@ import re
|
|
|
23
23
|
|
|
24
24
|
import common.template as template
|
|
25
25
|
import common.gen_constants as K
|
|
26
|
+
from common.template import Template
|
|
26
27
|
from common.gen_utils import save_file
|
|
27
28
|
from common.op_proto import OpProto
|
|
28
29
|
from common.base_generator import BaseGenerator
|
|
@@ -32,6 +33,13 @@ from .pyboost_utils import is_cube, AclnnUtils, get_return_type, merge_strings_b
|
|
|
32
33
|
from .op_template_parser import OpTemplateParser
|
|
33
34
|
|
|
34
35
|
|
|
36
|
+
def check_no_basic_int_type(op_args):
|
|
37
|
+
for arg in op_args:
|
|
38
|
+
if arg.arg_dtype in ["tuple[int]", "list[int]", "int"]:
|
|
39
|
+
return False
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
|
|
35
43
|
class PyboostCommonOpHeaderGenerator(BaseGenerator):
|
|
36
44
|
"""
|
|
37
45
|
Generates common C++ headers for PyBoost operations.
|
|
@@ -42,6 +50,11 @@ class PyboostCommonOpHeaderGenerator(BaseGenerator):
|
|
|
42
50
|
|
|
43
51
|
def __init__(self):
|
|
44
52
|
self.pyboost_op_header_str = template.PYBOOST_BASE_OP_DEFINE_TEMPLATE
|
|
53
|
+
self.pyboost_basic_type_func_template = Template(
|
|
54
|
+
'virtual ${return_type} Call(${call_args_with_type}) {' \
|
|
55
|
+
'\n MS_EXCEPTION(NotImplementedError) << "Basic type func not implemented";' \
|
|
56
|
+
'\n};'
|
|
57
|
+
)
|
|
45
58
|
|
|
46
59
|
def generate(self, work_path, op_protos):
|
|
47
60
|
"""
|
|
@@ -59,7 +72,10 @@ class PyboostCommonOpHeaderGenerator(BaseGenerator):
|
|
|
59
72
|
continue
|
|
60
73
|
op_parser = OpTemplateParser(op_proto)
|
|
61
74
|
op_name_str = op_proto.op_class.name
|
|
62
|
-
|
|
75
|
+
if op_proto.op_view and not check_no_basic_int_type(op_proto.op_args):
|
|
76
|
+
call_args_with_type = op_parser.parse_call_args_with_types(basic_type=True)
|
|
77
|
+
else:
|
|
78
|
+
call_args_with_type = op_parser.parse_call_args_with_types(basic_type=False)
|
|
63
79
|
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
64
80
|
output_is_tuple = "bool output_is_tuple() const override { return true; }" \
|
|
65
81
|
if is_op_multi_output(op_proto.op_returns) else ''
|
|
@@ -121,16 +137,77 @@ class PyboostOpHeaderGenerator(BaseGenerator):
|
|
|
121
137
|
is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
|
|
122
138
|
op_parser = OpTemplateParser(op_proto)
|
|
123
139
|
op_name_str = op_proto.op_class.name
|
|
124
|
-
|
|
140
|
+
if op_proto.op_view and not check_no_basic_int_type(op_proto.op_args):
|
|
141
|
+
call_args_with_type = op_parser.parse_call_args_with_types(basic_type=True)
|
|
142
|
+
else:
|
|
143
|
+
call_args_with_type = op_parser.parse_call_args_with_types(basic_type=False)
|
|
125
144
|
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
126
|
-
|
|
127
145
|
pyboost_op_str = self.PYBOOST_OP_HEADER_TEMPLATE.replace(op_name=op_name_str,
|
|
128
146
|
op_name_upper=op_name_str.upper(),
|
|
129
147
|
operator_name=op_proto.op_name,
|
|
130
148
|
call_args_with_type=call_args_with_type,
|
|
131
149
|
return_type=cpp_func_return)
|
|
150
|
+
save_path = os.path.join(work_path, self.code_generate_path if not is_ascend_comm_op \
|
|
151
|
+
else self.hccl_code_generate_path)
|
|
152
|
+
file_name = f"{op_proto.op_name}.h"
|
|
153
|
+
save_file(save_path, file_name, pyboost_op_str)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class PyboostInternalOpHeaderGenerator(BaseGenerator):
|
|
157
|
+
"""
|
|
158
|
+
Generates C++ headers for PyBoost internal operations.
|
|
132
159
|
|
|
133
|
-
|
|
160
|
+
This class generates header files for Ascend and defines the operation functions accordingly.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def __init__(self, device):
|
|
164
|
+
"""
|
|
165
|
+
Initializes the PyboostOpHeaderGenerator with the appropriate templates for the specified device.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
device (str): The target device (ascend, gpu, or cpu), currently only support ascend.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If the device is not supported.
|
|
172
|
+
"""
|
|
173
|
+
if device != 'ascend':
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Currently, only support 'ascend' for internal operations, {device} is not supported.")
|
|
176
|
+
self.pyboost_internal_op_header_template = template.PYBOOST_ASCEND_INTERNAL_OP_HEADER_TEMPLATE
|
|
177
|
+
self.code_generate_path = f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/internal/auto_generate/"
|
|
178
|
+
self.device = device
|
|
179
|
+
|
|
180
|
+
def generate(self, work_path, op_protos):
|
|
181
|
+
"""
|
|
182
|
+
Generates header files for the provided operator prototypes based on the device.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
work_path (str): The directory path where the header files will be saved.
|
|
186
|
+
op_protos (list): A list of operator prototypes containing information about the operators.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
None
|
|
190
|
+
"""
|
|
191
|
+
for op_proto in op_protos:
|
|
192
|
+
if op_proto.op_dispatch is None:
|
|
193
|
+
continue
|
|
194
|
+
if getattr(op_proto.op_dispatch, self.device) == 'None':
|
|
195
|
+
continue
|
|
196
|
+
if getattr(op_proto.op_dispatch, 'internal_op_ascend') == 'None':
|
|
197
|
+
continue
|
|
198
|
+
op_parser = OpTemplateParser(op_proto)
|
|
199
|
+
op_name_str = op_proto.op_class.name
|
|
200
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
201
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
202
|
+
|
|
203
|
+
pyboost_op_str = self.pyboost_internal_op_header_template.replace(
|
|
204
|
+
op_name=op_name_str,
|
|
205
|
+
op_name_upper=op_name_str.upper(),
|
|
206
|
+
operator_name=op_proto.op_name,
|
|
207
|
+
call_args_with_type=call_args_with_type,
|
|
208
|
+
return_type=cpp_func_return)
|
|
209
|
+
|
|
210
|
+
save_path = os.path.join(work_path, self.code_generate_path)
|
|
134
211
|
file_name = f"{op_proto.op_name}.h"
|
|
135
212
|
save_file(save_path, file_name, pyboost_op_str)
|
|
136
213
|
|
|
@@ -158,19 +235,24 @@ class PyboostOpCppGenerator:
|
|
|
158
235
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
|
|
159
236
|
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
160
237
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
238
|
+
self.device_reg_str = "Ascend"
|
|
161
239
|
elif device == 'cpu':
|
|
162
240
|
PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_CPU_CUSTOMIZE_CALL_TEMPLATE
|
|
163
241
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
|
|
164
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
242
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
165
243
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
244
|
+
self.device_reg_str = "CPU"
|
|
166
245
|
elif device == 'gpu':
|
|
167
246
|
PYBOOST_CUSTOMIZE_CALL_TEMPLATE = template.PYBOOST_GPU_CUSTOMIZE_CALL_TEMPLATE
|
|
168
247
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
|
|
169
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
248
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
170
249
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
250
|
+
self.device_reg_str = "GPU"
|
|
171
251
|
else:
|
|
172
252
|
raise ValueError(
|
|
173
253
|
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
254
|
+
self.PYBOOST_REG_OP_TEMPLATE = Template('MS_REG_PYBOOST_OP(${device}, ${op_name});' \
|
|
255
|
+
'${register_custom_kernel}')
|
|
174
256
|
self.PYBOOST_CUSTOMIZE_CALL_TEMPLATE = PYBOOST_CUSTOMIZE_CALL_TEMPLATE
|
|
175
257
|
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
|
|
176
258
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
|
|
@@ -201,8 +283,11 @@ class PyboostOpCppGenerator:
|
|
|
201
283
|
continue
|
|
202
284
|
is_ascend_comm_op = op_proto.op_dispatch.is_comm_op and self.device == 'ascend'
|
|
203
285
|
op_parser = OpTemplateParser(op_proto)
|
|
204
|
-
call_args =
|
|
205
|
-
|
|
286
|
+
call_args = OpTemplateParser.parse_original_call_args(op_proto.op_args)
|
|
287
|
+
if op_proto.op_view and not check_no_basic_int_type(op_proto.op_args):
|
|
288
|
+
call_args_with_type = op_parser.parse_call_args_with_types(True)
|
|
289
|
+
else:
|
|
290
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
206
291
|
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
207
292
|
operator_name = op_proto.op_name
|
|
208
293
|
op_name_str = op_proto.op_class.name
|
|
@@ -219,7 +304,8 @@ class PyboostOpCppGenerator:
|
|
|
219
304
|
check_expression=check_inplace_func,
|
|
220
305
|
)
|
|
221
306
|
if is_ascend_comm_op and ((merge_op_hccl_header is None) or (merge_op_hccl_function is None)):
|
|
222
|
-
raise ValueError(f"merge_op_hccl_header and merge_op_hccl_function
|
|
307
|
+
raise ValueError(f"merge_op_hccl_header and merge_op_hccl_function \
|
|
308
|
+
must be provided for comm op {operator_name}")
|
|
223
309
|
|
|
224
310
|
if is_ascend_comm_op:
|
|
225
311
|
customize_include = \
|
|
@@ -230,23 +316,30 @@ class PyboostOpCppGenerator:
|
|
|
230
316
|
|
|
231
317
|
register_custom = self._get_register_custom_kernel(op_proto)
|
|
232
318
|
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
319
|
+
op_register = self.PYBOOST_REG_OP_TEMPLATE.replace(op_name=op_name_str,
|
|
320
|
+
device=self.device_reg_str,
|
|
321
|
+
register_custom_kernel=register_custom)
|
|
233
322
|
if is_ascend_comm_op:
|
|
234
|
-
merge_op_hccl_header.append(
|
|
235
|
-
|
|
323
|
+
merge_op_hccl_header.append(
|
|
324
|
+
self.PYBOOST_SINGLE_HCLL_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
|
|
325
|
+
customize_include=customize_include))
|
|
236
326
|
merge_op_hccl_function.append(
|
|
237
327
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
328
|
+
call_args_with_type=call_args_with_type,
|
|
329
|
+
return_type=cpp_func_return, call_impl=call_impl,
|
|
330
|
+
op_register=op_register,
|
|
331
|
+
device=self.device_reg_str))
|
|
241
332
|
merge_op_hccl_inc.append(op_name_str)
|
|
242
333
|
else:
|
|
243
|
-
merge_op_header.append(
|
|
244
|
-
|
|
334
|
+
merge_op_header.append(
|
|
335
|
+
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=operator_name,
|
|
336
|
+
customize_include=customize_include))
|
|
245
337
|
merge_op_function.append(
|
|
246
338
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_name_str,
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
339
|
+
call_args_with_type=call_args_with_type,
|
|
340
|
+
return_type=cpp_func_return, call_impl=call_impl,
|
|
341
|
+
op_register=op_register,
|
|
342
|
+
device=self.device_reg_str))
|
|
250
343
|
merge_op_inc.append(op_name_str)
|
|
251
344
|
|
|
252
345
|
def _get_register_custom_kernel(self, op_proto: OpProto):
|
|
@@ -294,19 +387,24 @@ class PyboostViewOpCppGenerator:
|
|
|
294
387
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_HEADER_TEMPLATE
|
|
295
388
|
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
296
389
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
390
|
+
self.device_reg_str = "Ascend"
|
|
297
391
|
elif device == 'cpu':
|
|
298
392
|
PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_CPU_VIEW_CALL_TEMPLATE
|
|
299
393
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_CPU_SINGLE_OP_HEADER_TEMPLATE
|
|
300
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
394
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
301
395
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
396
|
+
self.device_reg_str = "CPU"
|
|
302
397
|
elif device == 'gpu':
|
|
303
398
|
PYBOOST_VIEW_CALL_TEMPLATE = template.PYBOOST_GPU_VIEW_CALL_TEMPLATE
|
|
304
399
|
PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.PYBOOST_GPU_SINGLE_OP_HEADER_TEMPLATE
|
|
305
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
400
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
306
401
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
402
|
+
self.device_reg_str = "GPU"
|
|
307
403
|
else:
|
|
308
404
|
raise ValueError(
|
|
309
405
|
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
406
|
+
self.PYBOOST_REG_OP_TEMPLATE = Template('MS_REG_PYBOOST_OP(${device}, ${op_name});' \
|
|
407
|
+
'${register_custom_kernel}')
|
|
310
408
|
self.PYBOOST_VIEW_CALL_TEMPLATE = PYBOOST_VIEW_CALL_TEMPLATE
|
|
311
409
|
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = PYBOOST_SINGLE_OP_HEADER_TEMPLATE
|
|
312
410
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = PYBOOST_SINGLE_OP_SOURCE_TEMPLATE
|
|
@@ -327,6 +425,7 @@ class PyboostViewOpCppGenerator:
|
|
|
327
425
|
merge_op_header (list): A list to store the generated C++ header code for view operations.
|
|
328
426
|
merge_op_function (list): A list to store the generated C++ source code for view operations.
|
|
329
427
|
"""
|
|
428
|
+
calc_args_temp = Template("{${call_args}}")
|
|
330
429
|
for op_proto in op_protos:
|
|
331
430
|
if op_proto.op_dispatch is None:
|
|
332
431
|
continue
|
|
@@ -339,10 +438,19 @@ class PyboostViewOpCppGenerator:
|
|
|
339
438
|
|
|
340
439
|
op_parser = OpTemplateParser(op_proto)
|
|
341
440
|
call_args_tensor = op_parser.get_call_args_tensor()
|
|
342
|
-
call_args =
|
|
343
|
-
|
|
441
|
+
call_args = OpTemplateParser.parse_original_call_args(op_proto.op_args)
|
|
442
|
+
if op_proto.op_view and not check_no_basic_int_type(op_proto.op_args):
|
|
443
|
+
call_args_with_type = op_parser.parse_call_args_with_types(True)
|
|
444
|
+
storage_calc_str = op_proto.op_class.name + "BasicType"
|
|
445
|
+
calc_func_args_str = call_args
|
|
446
|
+
else:
|
|
447
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
448
|
+
storage_calc_str = op_proto.op_class.name
|
|
449
|
+
calc_func_args_str = calc_args_temp.replace(call_args=call_args)
|
|
344
450
|
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
345
451
|
call_impl = self.PYBOOST_VIEW_CALL_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
452
|
+
storage_calc=storage_calc_str,
|
|
453
|
+
calc_func_args=calc_func_args_str,
|
|
346
454
|
call_args=call_args,
|
|
347
455
|
call_tensors=call_args_tensor,
|
|
348
456
|
return_values=call_func_outputs,
|
|
@@ -351,13 +459,16 @@ class PyboostViewOpCppGenerator:
|
|
|
351
459
|
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
352
460
|
merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
|
|
353
461
|
customize_include=customize_include))
|
|
354
|
-
|
|
462
|
+
op_register = self.PYBOOST_REG_OP_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
463
|
+
device=self.device_reg_str,
|
|
464
|
+
register_custom_kernel="")
|
|
355
465
|
merge_op_function.append(
|
|
356
466
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
357
467
|
call_args_with_type=call_args_with_type,
|
|
358
468
|
return_type=cpp_func_return,
|
|
359
469
|
call_impl=call_impl,
|
|
360
|
-
|
|
470
|
+
op_register=op_register,
|
|
471
|
+
device=self.device_reg_str))
|
|
361
472
|
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
362
473
|
|
|
363
474
|
|
|
@@ -389,17 +500,22 @@ class AclnnOpCppCodeGenerator:
|
|
|
389
500
|
PYBOOST_CALL_TEMPLATE = template.PYBOOST_ASCEND_CALL_TEMPLATE
|
|
390
501
|
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
391
502
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/auto_generate/"
|
|
503
|
+
self.device_reg_str = "Ascend"
|
|
392
504
|
elif device == 'cpu':
|
|
393
505
|
PYBOOST_CALL_TEMPLATE = template.PYBOOST_CPU_CALL_TEMPLATE
|
|
394
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
506
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
395
507
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/cpu/pyboost/auto_generate/"
|
|
508
|
+
self.device_reg_str = "CPU"
|
|
396
509
|
elif device == 'gpu':
|
|
397
510
|
PYBOOST_CALL_TEMPLATE = template.PYBOOST_GPU_CALL_TEMPLATE
|
|
398
|
-
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.
|
|
511
|
+
PYBOOST_SINGLE_OP_SOURCE_TEMPLATE = template.PYBOOST_ASCEND_SINGLE_OP_SOURCE_TEMPLATE
|
|
399
512
|
gen_path = f"{K.MS_OPS_KERNEL_PATH}/gpu/pyboost/auto_generate/"
|
|
513
|
+
self.device_reg_str = "GPU"
|
|
400
514
|
else:
|
|
401
515
|
raise ValueError(
|
|
402
516
|
f"Device must be ascend, gpu, or cpu, {device} is not supported")
|
|
517
|
+
self.PYBOOST_REG_OP_TEMPLATE = Template('MS_REG_PYBOOST_OP(${device}, ${op_name});' \
|
|
518
|
+
'${register_custom_kernel}')
|
|
403
519
|
self.PYBOOST_CALL_TEMPLATE = PYBOOST_CALL_TEMPLATE
|
|
404
520
|
self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE = template.Template(
|
|
405
521
|
'#include "kernel/${device}/pyboost/auto_generate/${operator_name}.h"\n'
|
|
@@ -438,11 +554,11 @@ class AclnnOpCppCodeGenerator:
|
|
|
438
554
|
aclnn_name = AclnnUtils.get_aclnn_interface(op_proto.op_class.name)
|
|
439
555
|
|
|
440
556
|
call_args_tensor = op_parser.get_call_args_tensor()
|
|
441
|
-
create_input_address =
|
|
557
|
+
create_input_address = AclnnOpCppCodeGenerator._generate_create_input_address(
|
|
442
558
|
op_parser)
|
|
443
|
-
malloc_inputs =
|
|
559
|
+
malloc_inputs = AclnnOpCppCodeGenerator._generate_malloc_input(op_parser)
|
|
444
560
|
op_outputs, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
445
|
-
get_inputs_kernel_tensors =
|
|
561
|
+
get_inputs_kernel_tensors = AclnnOpCppCodeGenerator._generate_get_inputs_kernel_tensors(
|
|
446
562
|
op_parser)
|
|
447
563
|
|
|
448
564
|
cube_math_type, get_cube_math_type = '', ''
|
|
@@ -454,13 +570,16 @@ class AclnnOpCppCodeGenerator:
|
|
|
454
570
|
real_output = ', ' + op_outputs \
|
|
455
571
|
if _generate_inplace_process_cpp_code(op_proto) == '' else ''
|
|
456
572
|
|
|
457
|
-
cast_input_code, real_call_args_tensor =
|
|
573
|
+
cast_input_code, real_call_args_tensor = AclnnOpCppCodeGenerator._generate_tensor_cpu_cast_input_code(
|
|
458
574
|
op_parser)
|
|
459
575
|
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
460
576
|
_, tensor_list_convert, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
|
|
461
577
|
call_args_after_convert, value_tuple_convert, const_number_convert = op_parser.op_args_converter()
|
|
462
|
-
call_args =
|
|
463
|
-
|
|
578
|
+
call_args = OpTemplateParser.parse_original_call_args(op_proto.op_args)
|
|
579
|
+
if op_proto.op_view and not check_no_basic_int_type(op_proto.op_args):
|
|
580
|
+
call_args_with_type = op_parser.parse_call_args_with_types(True)
|
|
581
|
+
else:
|
|
582
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
464
583
|
inplace_process = _generate_inplace_process_cpp_code(op_proto)
|
|
465
584
|
call_impl = self.PYBOOST_CALL_TEMPLATE.replace(aclnn_name=aclnn_name,
|
|
466
585
|
call_args=call_args,
|
|
@@ -485,16 +604,20 @@ class AclnnOpCppCodeGenerator:
|
|
|
485
604
|
|
|
486
605
|
merge_op_header.append(self.PYBOOST_SINGLE_OP_HEADER_TEMPLATE.replace(operator_name=op_proto.op_name,
|
|
487
606
|
device=self.device))
|
|
488
|
-
|
|
607
|
+
op_register = self.PYBOOST_REG_OP_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
608
|
+
device=self.device_reg_str,
|
|
609
|
+
register_custom_kernel="")
|
|
489
610
|
merge_op_function.append(
|
|
490
611
|
self.PYBOOST_SINGLE_OP_SOURCE_TEMPLATE.replace(op_name=op_proto.op_class.name,
|
|
491
612
|
call_args_with_type=call_args_with_type,
|
|
492
613
|
return_type=cpp_func_return,
|
|
493
614
|
call_impl=call_impl,
|
|
494
|
-
|
|
615
|
+
op_register=op_register,
|
|
616
|
+
device=self.device_reg_str))
|
|
495
617
|
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
496
618
|
|
|
497
|
-
|
|
619
|
+
@staticmethod
|
|
620
|
+
def _generate_tensor_cpu_cast_input_code(op_parser: OpTemplateParser):
|
|
498
621
|
"""
|
|
499
622
|
Generates the input casting code for CPU tensor operations.
|
|
500
623
|
|
|
@@ -524,18 +647,17 @@ class AclnnOpCppCodeGenerator:
|
|
|
524
647
|
cast_input = "auto &select_kernel = kernel_attr_pair.second;\n" + cast_input
|
|
525
648
|
return cast_input, real_call_args_tensor
|
|
526
649
|
|
|
527
|
-
|
|
650
|
+
@staticmethod
|
|
651
|
+
def _generate_create_input_address(op_parser: OpTemplateParser):
|
|
528
652
|
need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
|
|
529
653
|
create_input_address = ''
|
|
530
|
-
args_list = ''
|
|
531
|
-
for item in need_malloc_tensors:
|
|
532
|
-
args_list += f'{item}, '
|
|
533
|
-
args_list = args_list[:-2]
|
|
654
|
+
args_list = ', '.join(str(item) for item in need_malloc_tensors)
|
|
534
655
|
if args_list:
|
|
535
656
|
create_input_address = f'PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), {args_list});\n'
|
|
536
657
|
return create_input_address
|
|
537
658
|
|
|
538
|
-
|
|
659
|
+
@staticmethod
|
|
660
|
+
def _generate_malloc_input(op_parser: OpTemplateParser):
|
|
539
661
|
"""
|
|
540
662
|
Generates the code for creating input addresses for tensors that need to be allocated.
|
|
541
663
|
|
|
@@ -547,15 +669,13 @@ class AclnnOpCppCodeGenerator:
|
|
|
547
669
|
"""
|
|
548
670
|
need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
|
|
549
671
|
malloc_inputs = ''
|
|
550
|
-
args_list = ''
|
|
551
|
-
for item in need_malloc_tensors:
|
|
552
|
-
args_list += f'{item}, '
|
|
553
|
-
args_list = args_list[:-2]
|
|
672
|
+
args_list = ', '.join(str(item) for item in need_malloc_tensors)
|
|
554
673
|
if args_list:
|
|
555
674
|
malloc_inputs += f'PyBoostUtils::MallocOpInputs(device_context, {args_list});\n'
|
|
556
675
|
return malloc_inputs
|
|
557
676
|
|
|
558
|
-
|
|
677
|
+
@staticmethod
|
|
678
|
+
def _generate_get_inputs_kernel_tensors(op_parser: OpTemplateParser):
|
|
559
679
|
"""
|
|
560
680
|
Generates the code for retrieving input kernel tensors.
|
|
561
681
|
|
|
@@ -567,16 +687,178 @@ class AclnnOpCppCodeGenerator:
|
|
|
567
687
|
"""
|
|
568
688
|
_, _, call_args_with_tensor = op_parser.parse_need_malloc_tensors()
|
|
569
689
|
inputs_kernel_tensors = ''
|
|
570
|
-
args_list = ''
|
|
571
|
-
for item in call_args_with_tensor:
|
|
572
|
-
args_list += f'{item}, '
|
|
573
|
-
args_list = args_list[:-2]
|
|
690
|
+
args_list = ', '.join(str(item) for item in call_args_with_tensor)
|
|
574
691
|
if args_list:
|
|
575
692
|
inputs_kernel_tensors += f'const auto &input_address_info = PyBoostUtils::GetAddressInfo(' \
|
|
576
693
|
f'device_context, op->stream_id(), op->input_abs(), {args_list});\n'
|
|
577
694
|
return inputs_kernel_tensors
|
|
578
695
|
|
|
579
696
|
|
|
697
|
+
class InternalOpCppCodeGenerator:
|
|
698
|
+
"""
|
|
699
|
+
Generates C++ code files for internal operations in PyBoost.
|
|
700
|
+
"""
|
|
701
|
+
|
|
702
|
+
def __init__(self, device):
|
|
703
|
+
"""
|
|
704
|
+
Initializes the InternalOpCppCodeGenerator with the appropriate templates.
|
|
705
|
+
"""
|
|
706
|
+
self.device = device
|
|
707
|
+
self.internal_op_header_template = template.PYBOOST_INTERNAL_OP_HEADER_TEMPLATE
|
|
708
|
+
self.internal_single_op_header_template = template.PYBOOST_INTERNAL_SINGLE_OP_HEADER_TEMPLATE
|
|
709
|
+
self.internal_op_source_template = template.PYBOOST_INTERNAL_OP_SOURCE_TEMPLATE
|
|
710
|
+
self.internal_single_op_source_template = template.PYBOOST_INTERNAL_SINGLE_OP_SOURCE_TEMPLATE
|
|
711
|
+
self.internal_single_op_customize_source_template = template.PYBOOST_INTERNAL_SINGLE_OP_CUSTOMIZE_TEMPLATE
|
|
712
|
+
self.customize_inc_template = Template(
|
|
713
|
+
'#include "{ms_ops_kernel_path}/ascend/pyboost/internal/customize/${operator_name}.h"\n'
|
|
714
|
+
)
|
|
715
|
+
self.gen_path = f"{K.MS_OPS_KERNEL_PATH}/ascend/pyboost/internal/auto_generate/"
|
|
716
|
+
|
|
717
|
+
def generate_internal_op_cpp_code(self, work_path, op_protos):
|
|
718
|
+
"""
|
|
719
|
+
Generate internal op cpp code in pyboost.
|
|
720
|
+
"""
|
|
721
|
+
merge_op_header = []
|
|
722
|
+
merge_op_function = []
|
|
723
|
+
ascend_merge_op_inc = []
|
|
724
|
+
for op_proto in op_protos:
|
|
725
|
+
if op_proto.op_dispatch is None or not op_proto.op_dispatch.enable:
|
|
726
|
+
continue
|
|
727
|
+
if getattr(op_proto.op_dispatch, 'internal_op_ascend') == 'None':
|
|
728
|
+
continue
|
|
729
|
+
internal_op_ascend = op_proto.op_dispatch.internal_op_ascend
|
|
730
|
+
op_name = op_proto.op_class.name
|
|
731
|
+
if internal_op_ascend == 'AutoGen':
|
|
732
|
+
self.generate_default_call(work_path, op_proto, merge_op_header,
|
|
733
|
+
merge_op_function, ascend_merge_op_inc)
|
|
734
|
+
elif internal_op_ascend == 'Internal' + op_name + 'AscendCustomize':
|
|
735
|
+
self.generate_customize_call(work_path, op_proto, merge_op_header,
|
|
736
|
+
merge_op_function, ascend_merge_op_inc)
|
|
737
|
+
|
|
738
|
+
if not ascend_merge_op_inc:
|
|
739
|
+
return
|
|
740
|
+
ops_inc_head_set = set()
|
|
741
|
+
for op_name_inc in ascend_merge_op_inc:
|
|
742
|
+
ops_inc_head_set.add(template.OP_DEF_INC_HEAD_TEMPLATE.replace(prefix_char=op_name_inc[0].lower()))
|
|
743
|
+
|
|
744
|
+
internal_op_source_str = self.internal_op_source_template.replace(ops_prim_inc=list(sorted(ops_inc_head_set)),
|
|
745
|
+
merge_op_header=merge_op_header,
|
|
746
|
+
merge_op_function=merge_op_function)
|
|
747
|
+
save_path = os.path.join(work_path, self.gen_path)
|
|
748
|
+
file_name = "pyboost_ascend_internal_ops.cc"
|
|
749
|
+
save_file(save_path, file_name, internal_op_source_str)
|
|
750
|
+
|
|
751
|
+
def generate_default_op_function(self, op_parser, op_proto):
|
|
752
|
+
"""
|
|
753
|
+
Generate default op call function.
|
|
754
|
+
"""
|
|
755
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
756
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
757
|
+
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
758
|
+
call_args = OpTemplateParser.parse_original_call_args(op_proto.op_args)
|
|
759
|
+
call_args_after_convert, value_tuple_convert, const_number_convert = op_parser.op_args_converter()
|
|
760
|
+
create_input_address, create_output_address = self._create_input_and_output_address(op_parser, op_proto)
|
|
761
|
+
internal_op_source_str = self.internal_single_op_source_template.replace(
|
|
762
|
+
op_name=op_proto.op_class.name,
|
|
763
|
+
operator_name=op_proto.op_name,
|
|
764
|
+
call_args_with_type=call_args_with_type,
|
|
765
|
+
internal_call_args=call_args,
|
|
766
|
+
internal_real_call_args=call_args_after_convert,
|
|
767
|
+
create_input_address=create_input_address,
|
|
768
|
+
create_output_address=create_output_address,
|
|
769
|
+
value_tuple_convert=value_tuple_convert,
|
|
770
|
+
const_number_convert=const_number_convert,
|
|
771
|
+
return_type=cpp_func_return,
|
|
772
|
+
return_values=call_func_outputs)
|
|
773
|
+
return internal_op_source_str
|
|
774
|
+
|
|
775
|
+
def generate_default_call(self, work_path, op_proto, merge_op_header,
|
|
776
|
+
merge_op_function, ascend_merge_op_inc):
|
|
777
|
+
"""
|
|
778
|
+
Generate internal op default call function in pyboost.
|
|
779
|
+
"""
|
|
780
|
+
op_parser = OpTemplateParser(op_proto)
|
|
781
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
782
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
783
|
+
|
|
784
|
+
# generate op header
|
|
785
|
+
internal_op_header_str = self.internal_op_header_template.replace(
|
|
786
|
+
operator_name=op_proto.op_name,
|
|
787
|
+
op_name=op_proto.op_class.name,
|
|
788
|
+
op_name_upper=op_proto.op_class.name.upper(),
|
|
789
|
+
call_args_with_type=call_args_with_type,
|
|
790
|
+
return_type=cpp_func_return)
|
|
791
|
+
save_path = os.path.join(work_path, self.gen_path)
|
|
792
|
+
save_file(save_path, f"{op_proto.op_name}.h", internal_op_header_str)
|
|
793
|
+
merge_op_header.append(
|
|
794
|
+
self.internal_single_op_header_template.replace(
|
|
795
|
+
operator_name=op_proto.op_name,
|
|
796
|
+
customize_inc=''))
|
|
797
|
+
|
|
798
|
+
# generate op function
|
|
799
|
+
internal_op_source_str = self.generate_default_op_function(op_parser, op_proto)
|
|
800
|
+
merge_op_function.append(internal_op_source_str)
|
|
801
|
+
|
|
802
|
+
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
803
|
+
|
|
804
|
+
def generate_customize_call(self, work_path, op_proto, merge_op_header,
|
|
805
|
+
merge_op_function, ascend_merge_op_inc):
|
|
806
|
+
"""
|
|
807
|
+
Generate internal op customize call function in pyboost.
|
|
808
|
+
"""
|
|
809
|
+
op_parser = OpTemplateParser(op_proto)
|
|
810
|
+
call_args_with_type = op_parser.parse_call_args_with_types()
|
|
811
|
+
cpp_func_return = _generate_cpp_func_return(op_proto)
|
|
812
|
+
|
|
813
|
+
# generate op header
|
|
814
|
+
internal_op_header_str = self.internal_op_header_template.replace(
|
|
815
|
+
operator_name=op_proto.op_name,
|
|
816
|
+
op_name=op_proto.op_class.name,
|
|
817
|
+
op_name_upper=op_proto.op_class.name.upper(),
|
|
818
|
+
call_args_with_type=call_args_with_type,
|
|
819
|
+
return_type=cpp_func_return)
|
|
820
|
+
save_path = os.path.join(work_path, self.gen_path)
|
|
821
|
+
save_file(save_path, f"{op_proto.op_name}.h", internal_op_header_str)
|
|
822
|
+
self.customize_inc_template.replace(
|
|
823
|
+
ms_ops_kernel_path=K.MS_OPS_KERNEL_PATH,
|
|
824
|
+
operator_name=op_proto.op_name)
|
|
825
|
+
merge_op_header.append(
|
|
826
|
+
self.internal_single_op_header_template.replace(
|
|
827
|
+
operator_name=op_proto.op_name,
|
|
828
|
+
customize_inc=self.customize_inc_template))
|
|
829
|
+
|
|
830
|
+
# generate op function
|
|
831
|
+
_, call_func_outputs = op_parser.generate_pyboost_outputs()
|
|
832
|
+
call_args = OpTemplateParser.parse_original_call_args(op_proto.op_args)
|
|
833
|
+
internal_op_source_str = self.internal_single_op_customize_source_template.replace(
|
|
834
|
+
op_name=op_proto.op_class.name,
|
|
835
|
+
call_args=call_args,
|
|
836
|
+
call_args_with_type=call_args_with_type,
|
|
837
|
+
return_type=cpp_func_return,
|
|
838
|
+
return_values=call_func_outputs)
|
|
839
|
+
merge_op_function.append(internal_op_source_str)
|
|
840
|
+
ascend_merge_op_inc.append(op_proto.op_class.name)
|
|
841
|
+
|
|
842
|
+
@staticmethod
|
|
843
|
+
def _create_input_and_output_address(op_parser: OpTemplateParser, op_proto):
|
|
844
|
+
"""
|
|
845
|
+
Create input and output address.
|
|
846
|
+
"""
|
|
847
|
+
need_malloc_tensors, _, _ = op_parser.parse_need_malloc_tensors()
|
|
848
|
+
create_input_address = ''
|
|
849
|
+
create_output_address = ''
|
|
850
|
+
args_list = ''.join(f'{arg}, ' for arg in need_malloc_tensors)
|
|
851
|
+
args_list = args_list[:-2]
|
|
852
|
+
if args_list:
|
|
853
|
+
create_input_address = f'PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), {args_list});\n'
|
|
854
|
+
if op_proto.op_args_signature and op_proto.op_args_signature.rw_write:
|
|
855
|
+
create_output_address = ''
|
|
856
|
+
else:
|
|
857
|
+
create_output_address = 'PyBoostUtils::PrepareOpOutputs(device_context_, op->stream_id(), outputs_);\n'
|
|
858
|
+
|
|
859
|
+
return create_input_address, create_output_address
|
|
860
|
+
|
|
861
|
+
|
|
580
862
|
class PyboostOpFunctionGenerator(BaseGenerator):
|
|
581
863
|
"""
|
|
582
864
|
Generates C++ source files for ACLNN operations in PyBoost.
|
|
@@ -595,6 +877,7 @@ class PyboostOpFunctionGenerator(BaseGenerator):
|
|
|
595
877
|
self.ascend_op_cpp_generator = PyboostOpCppGenerator('ascend')
|
|
596
878
|
self.ascend_view_op_cpp_generator = PyboostViewOpCppGenerator('ascend')
|
|
597
879
|
self.ascend_aclnn_cpp_generator = AclnnOpCppCodeGenerator('ascend')
|
|
880
|
+
self.ascend_internal_op_cpp_generator = InternalOpCppCodeGenerator('ascend')
|
|
598
881
|
|
|
599
882
|
self.cpu_op_cpp_generator = PyboostOpCppGenerator('cpu')
|
|
600
883
|
self.cpu_view_op_cpp_generator = PyboostViewOpCppGenerator('cpu')
|
|
@@ -659,6 +942,7 @@ class PyboostOpFunctionGenerator(BaseGenerator):
|
|
|
659
942
|
self.ascend_aclnn_cpp_generator.generate_aclnn_op_cpp_code(op_protos, ascend_merge_op_header,
|
|
660
943
|
ascend_merge_op_function,
|
|
661
944
|
ascend_merge_op_inc)
|
|
945
|
+
self.ascend_internal_op_cpp_generator.generate_internal_op_cpp_code(work_path, op_protos)
|
|
662
946
|
|
|
663
947
|
ascend_op_header_merge_by_chunk_size = merge_strings_by_chunk_size(
|
|
664
948
|
ascend_merge_op_header, chunk_size=120)
|
|
@@ -688,8 +972,8 @@ class PyboostOpFunctionGenerator(BaseGenerator):
|
|
|
688
972
|
hccl_pyboost_op_source = self.PYBOOST_ASCEND_OP_SOURCE_TEMPLATE.replace(
|
|
689
973
|
merge_op_header='\n'.join(hccl_merge_op_header), merge_op_function='\n'.join(hccl_merge_op_function),
|
|
690
974
|
ops_inc=list(sorted(ops_hccl_inc_head_set)))
|
|
691
|
-
save_file(os.path.join(work_path, self.hccl_gen_path), f"pyboost_hccl_ops.cc",
|
|
692
|
-
|
|
975
|
+
save_file(os.path.join(work_path, self.hccl_gen_path), f"pyboost_hccl_ops.cc", \
|
|
976
|
+
hccl_pyboost_op_source)
|
|
693
977
|
|
|
694
978
|
def _generate_pyboost_cpu_ops(self, work_path, op_protos):
|
|
695
979
|
"""
|
|
@@ -788,8 +1072,7 @@ class PyboostOpFunctionGenerator(BaseGenerator):
|
|
|
788
1072
|
None
|
|
789
1073
|
"""
|
|
790
1074
|
all_files = os.listdir(files_path)
|
|
791
|
-
old_pyboost_ops_files = [file for file in all_files if re.match(
|
|
792
|
-
r'pyboost_.*_ops_.*\.cc', file)]
|
|
1075
|
+
old_pyboost_ops_files = [file for file in all_files if re.match(r'pyboost_.*_ops_.*\.cc', file)]
|
|
793
1076
|
old_files_num = len(old_pyboost_ops_files)
|
|
794
1077
|
if new_gen_num != old_files_num:
|
|
795
1078
|
for file in old_pyboost_ops_files:
|
|
@@ -810,8 +1093,8 @@ def _generate_cpp_func_return(op_proto):
|
|
|
810
1093
|
"""
|
|
811
1094
|
returns_type = []
|
|
812
1095
|
type_convert_to_base = {
|
|
813
|
-
'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::
|
|
814
|
-
'mindspore::tensor::TensorPtr': 'mindspore::tensor::
|
|
1096
|
+
'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::TensorPtr>',
|
|
1097
|
+
'mindspore::tensor::TensorPtr': 'mindspore::tensor::TensorPtr'
|
|
815
1098
|
}
|
|
816
1099
|
for return_obj in op_proto.op_returns:
|
|
817
1100
|
temp_return = get_return_type(return_obj.arg_dtype)
|
|
@@ -866,8 +1149,8 @@ def delete_residual_files(work_path, op_protos):
|
|
|
866
1149
|
all_operator_name = []
|
|
867
1150
|
for op_proto in op_protos:
|
|
868
1151
|
all_operator_name.append(op_proto.op_name)
|
|
869
|
-
|
|
870
|
-
|
|
1152
|
+
devices = ["ascend", "gpu", "cpu"]
|
|
1153
|
+
code_generate_path_list = [f"{K.MS_OPS_KERNEL_PATH}/{device}/pyboost/auto_generate/" for device in devices]
|
|
871
1154
|
code_generate_path_list.append(
|
|
872
1155
|
f"{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/")
|
|
873
1156
|
for code_generate_path in code_generate_path_list:
|
|
@@ -916,19 +1199,23 @@ class PyboostOpRegisterCppCodeGenerator:
|
|
|
916
1199
|
None
|
|
917
1200
|
"""
|
|
918
1201
|
all_op_names = []
|
|
1202
|
+
internal_op_names = []
|
|
919
1203
|
all_functional_names = []
|
|
920
1204
|
for op_proto in op_protos:
|
|
921
1205
|
if op_proto.op_dispatch is None:
|
|
922
1206
|
continue
|
|
923
|
-
functional_name = op_proto.op_name
|
|
924
1207
|
op_name_str = op_proto.op_class.name
|
|
1208
|
+
if getattr(op_proto.op_dispatch, 'internal_op_ascend') != 'None':
|
|
1209
|
+
internal_op_names.append(op_name_str)
|
|
925
1210
|
all_op_names.append(op_name_str)
|
|
926
|
-
all_functional_names.append(
|
|
1211
|
+
all_functional_names.append(op_proto.op_name)
|
|
927
1212
|
|
|
928
1213
|
include_str = ''
|
|
929
1214
|
factory_str = ''
|
|
930
1215
|
for op_name in all_op_names:
|
|
931
1216
|
factory_str += "template class OpFactory<{0}>;\n".format(op_name)
|
|
1217
|
+
for op_name in internal_op_names:
|
|
1218
|
+
factory_str += "template class InternalOpFactory<{0}>;\n".format(op_name)
|
|
932
1219
|
for operator_name in all_functional_names:
|
|
933
1220
|
include_str += f'#include "{K.MS_PYBOOST_BASE_PATH}/auto_generate/{operator_name}.h"\n'
|
|
934
1221
|
op_register_file_str = self.PYBOOST_OP_REGISTER_TEMPLATE.replace(op_includes=include_str,
|