mindspore 2.4.1__cp39-cp39-win_amd64.whl → 2.5.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +8 -3
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +0 -5
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/compile_config.py +64 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
- mindspore/_extends/parse/parser.py +23 -5
- mindspore/_extends/parse/standard_method.py +123 -27
- mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
- mindspore/amp.py +7 -1
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/boost_cell_wrapper.py +136 -41
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +3 -1
- mindspore/common/_register_for_tensor.py +0 -1
- mindspore/common/_stub_tensor.py +25 -4
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +6132 -0
- mindspore/common/api.py +99 -25
- mindspore/common/dtype.py +34 -34
- mindspore/common/dump.py +2 -1
- mindspore/common/file_system.py +8 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +3 -1
- mindspore/common/initializer.py +3 -4
- mindspore/common/lazy_inline.py +8 -2
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/parameter.py +30 -27
- mindspore/common/tensor.py +713 -1337
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +10 -0
- mindspore/communication/comm_func.py +215 -173
- mindspore/communication/management.py +23 -20
- mindspore/context.py +292 -193
- mindspore/dataset/__init__.py +23 -19
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +84 -3
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +5 -4
- mindspore/dataset/engine/datasets.py +192 -149
- mindspore/dataset/engine/datasets_audio.py +14 -0
- mindspore/dataset/engine/datasets_standard_format.py +28 -11
- mindspore/dataset/engine/datasets_text.py +38 -1
- mindspore/dataset/engine/datasets_user_defined.py +125 -65
- mindspore/dataset/engine/datasets_vision.py +81 -8
- mindspore/dataset/engine/iterators.py +281 -63
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +26 -2
- mindspore/dataset/engine/serializer_deserializer.py +1 -1
- mindspore/dataset/engine/validators.py +43 -11
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +29 -12
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +94 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +127 -0
- mindspore/device_context/cpu/__init__.py +25 -0
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +134 -0
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +3 -2
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
- mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
- mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/optim/adadelta.py +26 -22
- mindspore/experimental/optim/adam.py +3 -0
- mindspore/experimental/optim/lr_scheduler.py +33 -24
- mindspore/experimental/optim/radam.py +33 -30
- mindspore/hal/device.py +28 -0
- mindspore/hal/event.py +17 -0
- mindspore/hal/memory.py +94 -3
- mindspore/hal/stream.py +91 -6
- mindspore/include/api/context.h +1 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +12 -0
- mindspore/mindrecord/__init__.py +1 -1
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +824 -218
- mindspore/mint/distributed/__init__.py +66 -4
- mindspore/mint/distributed/distributed.py +2594 -44
- mindspore/mint/linalg/__init__.py +6 -0
- mindspore/mint/nn/__init__.py +473 -14
- mindspore/mint/nn/functional.py +486 -11
- mindspore/mint/nn/layer/__init__.py +17 -4
- mindspore/mint/nn/layer/_functions.py +330 -0
- mindspore/mint/nn/layer/activation.py +169 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +727 -0
- mindspore/mint/nn/layer/normalization.py +215 -19
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +170 -0
- mindspore/mint/optim/__init__.py +2 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/special/__init__.py +2 -1
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +142 -21
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +6 -6
- mindspore/nn/layer/basic.py +35 -25
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/conv.py +3 -0
- mindspore/nn/layer/embedding.py +3 -3
- mindspore/nn/layer/normalization.py +8 -7
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +55 -23
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +48 -26
- mindspore/nn/learning_rate_schedule.py +5 -3
- mindspore/nn/loss/loss.py +31 -36
- mindspore/nn/optim/ada_grad.py +1 -0
- mindspore/nn/optim/adadelta.py +2 -2
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/utils/__init__.py +22 -0
- mindspore/nn/utils/init.py +73 -0
- mindspore/nn/wrap/cell_wrapper.py +4 -6
- mindspore/nn/wrap/loss_scale.py +3 -4
- mindspore/numpy/array_creations.py +60 -62
- mindspore/numpy/array_ops.py +148 -143
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +16 -16
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +2 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
- mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +20 -19
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
- mindspore/ops/_vmap/vmap_math_ops.py +11 -9
- mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
- mindspore/ops/auto_generate/gen_extend_func.py +554 -60
- mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
- mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
- mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
- mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
- mindspore/ops/function/__init__.py +12 -0
- mindspore/ops/function/array_func.py +561 -159
- mindspore/ops/function/clip_func.py +64 -0
- mindspore/ops/function/debug_func.py +28 -20
- mindspore/ops/function/image_func.py +1 -1
- mindspore/ops/function/linalg_func.py +5 -4
- mindspore/ops/function/math_func.py +1664 -294
- mindspore/ops/function/nn_func.py +988 -317
- mindspore/ops/function/parameter_func.py +3 -56
- mindspore/ops/function/random_func.py +243 -33
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/functional.py +18 -5
- mindspore/ops/functional_overload.py +897 -0
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -34
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +38 -8
- mindspore/ops/operations/array_ops.py +45 -303
- mindspore/ops/operations/comm_ops.py +23 -17
- mindspore/ops/operations/custom_ops.py +7 -49
- mindspore/ops/operations/debug_ops.py +42 -47
- mindspore/ops/operations/inner_ops.py +6 -4
- mindspore/ops/operations/linalg_ops.py +3 -2
- mindspore/ops/operations/manually_defined/ops_def.py +185 -104
- mindspore/ops/operations/math_ops.py +11 -216
- mindspore/ops/operations/nn_ops.py +153 -310
- mindspore/ops/primitive.py +23 -21
- mindspore/ops/tensor_method.py +1669 -0
- mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
- mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
- mindspore/ops_generate/arg_handler.py +0 -61
- mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
- mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/base_generator.py +11 -0
- mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
- mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
- mindspore/ops_generate/functional_overload_py_generator.py +110 -0
- mindspore/ops_generate/functions_cc_generator.py +233 -0
- mindspore/ops_generate/gen_aclnn_implement.py +110 -114
- mindspore/ops_generate/gen_constants.py +157 -3
- mindspore/ops_generate/gen_ops.py +245 -990
- mindspore/ops_generate/gen_pyboost_func.py +97 -998
- mindspore/ops_generate/gen_utils.py +119 -33
- mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
- mindspore/ops_generate/op_api_proto.py +206 -0
- mindspore/ops_generate/op_def_py_generator.py +131 -0
- mindspore/ops_generate/op_prim_py_generator.py +480 -0
- mindspore/ops_generate/op_proto.py +373 -108
- mindspore/ops_generate/op_template_parser.py +436 -0
- mindspore/ops_generate/ops_def_cc_generator.py +288 -0
- mindspore/ops_generate/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/ops_name_h_generator.py +68 -0
- mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
- mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
- mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
- mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
- mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
- mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
- mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
- mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
- mindspore/ops_generate/pyboost_utils.py +92 -33
- mindspore/ops_generate/template.py +294 -44
- mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
- mindspore/parallel/__init__.py +3 -3
- mindspore/parallel/_auto_parallel_context.py +44 -34
- mindspore/parallel/_cell_wrapper.py +22 -3
- mindspore/parallel/_parallel_serialization.py +13 -2
- mindspore/parallel/_utils.py +4 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +44 -0
- mindspore/parallel/cluster/process_entity/_api.py +131 -37
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +20 -3
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +3 -0
- mindspore/parallel/transform_safetensors.py +119 -253
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +17 -4
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +174 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +202 -0
- mindspore/profiler/common/path_manager.py +371 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +476 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +210 -0
- mindspore/profiler/common/profiler_path_manager.py +120 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +270 -37
- mindspore/profiler/envprofiler.py +138 -0
- mindspore/profiler/mstx.py +199 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +309 -0
- mindspore/profiler/profiler.py +580 -93
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +114 -0
- mindspore/profiler/schedule.py +208 -0
- mindspore/rewrite/api/symbol_tree.py +1 -2
- mindspore/run_check/_check_version.py +18 -13
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +148 -0
- mindspore/runtime/memory.py +392 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +2 -2
- mindspore/train/_utils.py +53 -18
- mindspore/train/amp.py +8 -4
- mindspore/train/callback/_checkpoint.py +32 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +105 -69
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_summary_collector.py +44 -6
- mindspore/train/callback/_tft_register.py +37 -15
- mindspore/train/dataset_helper.py +11 -11
- mindspore/train/metrics/precision.py +4 -5
- mindspore/train/mind_ir_pb2.py +167 -46
- mindspore/train/model.py +13 -14
- mindspore/train/serialization.py +461 -72
- mindspore/train/summary/summary_record.py +1 -2
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +4 -2
- mindspore/utils/dryrun.py +138 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +391 -265
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""
|
|
16
|
+
This module provides classes for generating C++ header and implementation files for functions based on op_protos.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
|
|
21
|
+
import template
|
|
22
|
+
from template import Template
|
|
23
|
+
import gen_constants as K
|
|
24
|
+
from gen_utils import save_file
|
|
25
|
+
from base_generator import BaseGenerator
|
|
26
|
+
from pyboost_utils import is_optional_param, get_input_dtype, get_return_type
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FunctionsHeaderGenerator(BaseGenerator):
|
|
30
|
+
"""
|
|
31
|
+
Generates C++ header files for backend functions based on operator prototypes.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
"""
|
|
36
|
+
Initialize the functions header generator with templates for code generation.
|
|
37
|
+
"""
|
|
38
|
+
self.FUNCTIONS_H_TEMPLATE = template.FUNCTIONS_H_TEMPLATE
|
|
39
|
+
self.function_interface_template = Template("${return_type} BACKEND_EXPORT ${op_name}(${input_args});")
|
|
40
|
+
|
|
41
|
+
def generate(self, work_path, op_protos):
|
|
42
|
+
"""
|
|
43
|
+
Generate the header file for backend functions.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
work_path (str): The directory where the generated file should be saved.
|
|
47
|
+
op_protos (list): A list of operator prototypes used to generate the header.
|
|
48
|
+
"""
|
|
49
|
+
functions_list = []
|
|
50
|
+
for op_proto in op_protos:
|
|
51
|
+
if op_proto.op_dispatch is None or op_proto.op_dispatch.is_comm_op:
|
|
52
|
+
continue
|
|
53
|
+
input_args_with_type_str = self._get_input_args(op_proto)
|
|
54
|
+
return_type_str = _get_return_type_str(op_proto)
|
|
55
|
+
functions = self.function_interface_template.replace(op_name=op_proto.op_name,
|
|
56
|
+
input_args=input_args_with_type_str,
|
|
57
|
+
return_type=return_type_str)
|
|
58
|
+
functions_list.append(functions)
|
|
59
|
+
pyboost_func_h_str = self.FUNCTIONS_H_TEMPLATE.replace(op_call_with_grad=functions_list)
|
|
60
|
+
save_path = os.path.join(work_path, K.MS_OPS_KERNEL_FUNCTIONS_AUTO_GEN_PATH)
|
|
61
|
+
file_name = "functions.h"
|
|
62
|
+
save_file(save_path, file_name, pyboost_func_h_str)
|
|
63
|
+
|
|
64
|
+
def _get_input_args(self, op_proto):
|
|
65
|
+
"""
|
|
66
|
+
Get the input arguments with type information for the function interface.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
op_proto: The operator prototype.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
str: A string of input arguments with types.
|
|
73
|
+
"""
|
|
74
|
+
args_list = []
|
|
75
|
+
for op_arg in op_proto.op_args:
|
|
76
|
+
input_dtype = get_input_dtype(op_arg.arg_dtype, is_optional_param(op_arg))
|
|
77
|
+
args_list.append("const " + input_dtype + " &" + op_arg.arg_name)
|
|
78
|
+
return args_list
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class FunctionsGenerator(BaseGenerator):
|
|
82
|
+
"""
|
|
83
|
+
Generates C++ implementation files for backend functions based on operator prototypes.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self):
|
|
87
|
+
"""
|
|
88
|
+
Initialize the functions generator with templates for code generation.
|
|
89
|
+
"""
|
|
90
|
+
self.FUNCTIONS_CC_TEMPLATE = template.FUNCTIONS_CC_TEMPLATE
|
|
91
|
+
self.FUNCTION_BODY_TEMPLATE = template.FUNCTION_BODY_TEMPLATE
|
|
92
|
+
self.pyboost_func_include_header_template = Template(
|
|
93
|
+
f'#include "{K.MS_COMMON_PYBOOST_KERNEL_PATH}/auto_generate/${{operator_name}}.h"\n'
|
|
94
|
+
)
|
|
95
|
+
self.clone_inplace_input_template = Template(
|
|
96
|
+
'GetCloneFunc()(op, prim::kPrim${class_name}, device_target, {${grad_args}});'
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def generate(self, work_path, op_protos):
|
|
100
|
+
"""
|
|
101
|
+
Generate the implementation file for backend functions.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
work_path (str): The directory where the generated file should be saved.
|
|
105
|
+
op_protos (list): A list of operator prototypes used to generate the implementation.
|
|
106
|
+
"""
|
|
107
|
+
func_include_headers_list = []
|
|
108
|
+
op_call_with_grad_list = []
|
|
109
|
+
for op_proto in op_protos:
|
|
110
|
+
if op_proto.op_dispatch is None or op_proto.op_dispatch.is_comm_op:
|
|
111
|
+
continue
|
|
112
|
+
func_include_headers_list.append(
|
|
113
|
+
self.pyboost_func_include_header_template.replace(operator_name=op_proto.op_name))
|
|
114
|
+
op_call_with_grad_list.append(self._get_function_body(op_proto))
|
|
115
|
+
pyboost_func_h_str = self.FUNCTIONS_CC_TEMPLATE.replace(op_call_with_grad=op_call_with_grad_list,
|
|
116
|
+
pyboost_op_header_include=func_include_headers_list)
|
|
117
|
+
save_path = os.path.join(work_path, K.MS_OPS_KERNEL_FUNCTIONS_AUTO_GEN_PATH)
|
|
118
|
+
file_name = "functions.cc"
|
|
119
|
+
save_file(save_path, file_name, pyboost_func_h_str)
|
|
120
|
+
|
|
121
|
+
def _get_function_body(self, op_proto):
|
|
122
|
+
"""
|
|
123
|
+
Get the function body for a given operator prototype.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
op_proto: The operator prototype.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
str: The generated function body.
|
|
130
|
+
"""
|
|
131
|
+
input_args = self._get_input_args(op_proto, False)
|
|
132
|
+
input_args_with_type = self._get_input_args(op_proto, True)
|
|
133
|
+
inplace_clone_args = self._get_clone_input_args(op_proto, False, False)
|
|
134
|
+
clone_func_str = self._get_clone_inplace_str(op_proto.op_inplace, op_proto.op_class.name, inplace_clone_args)
|
|
135
|
+
return_type_str = _get_return_type_str(op_proto)
|
|
136
|
+
return self.FUNCTION_BODY_TEMPLATE.replace(op_name=op_proto.op_name,
|
|
137
|
+
class_name=op_proto.op_class.name,
|
|
138
|
+
input_args=input_args,
|
|
139
|
+
clone_func=clone_func_str,
|
|
140
|
+
input_args_with_type=input_args_with_type,
|
|
141
|
+
return_type=return_type_str)
|
|
142
|
+
|
|
143
|
+
def _get_input_args(self, op_proto, has_type):
|
|
144
|
+
"""
|
|
145
|
+
Get the input arguments for the function body.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
op_proto: The operator prototype.
|
|
149
|
+
has_type (bool): Whether to include type information for the arguments.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
str: A string of input arguments, with or without types.
|
|
153
|
+
"""
|
|
154
|
+
args_list = []
|
|
155
|
+
for op_arg in op_proto.op_args:
|
|
156
|
+
input_dtype = get_input_dtype(op_arg.arg_dtype, is_optional_param(op_arg))
|
|
157
|
+
if has_type:
|
|
158
|
+
args_list.append("const " + input_dtype + " &" + op_arg.arg_name)
|
|
159
|
+
else:
|
|
160
|
+
args_list.append(op_arg.arg_name)
|
|
161
|
+
return args_list
|
|
162
|
+
|
|
163
|
+
def _get_clone_inplace_str(self, is_inplace_op: bool, class_name: str, grad_args: list):
|
|
164
|
+
"""
|
|
165
|
+
Generates the view base str of arguments for the operator.
|
|
166
|
+
|
|
167
|
+
This method constructs a list of argument names that need to be cast to their corresponding types.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
is_view_or_inplace (bool): Whether the op is view op or inplace op.
|
|
171
|
+
grad_args (list): grad args
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
str: Formatted view or inplace first argument names.
|
|
175
|
+
"""
|
|
176
|
+
if not is_inplace_op:
|
|
177
|
+
return ''
|
|
178
|
+
return self.clone_inplace_input_template.replace(class_name=class_name, grad_args=grad_args)
|
|
179
|
+
|
|
180
|
+
def _get_clone_input_args(self, op_proto, has_type, with_optional):
|
|
181
|
+
"""
|
|
182
|
+
Get the input arguments for the DoGrad function.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
op_proto: The operator prototype.
|
|
186
|
+
has_type (bool): Whether to include type information for the arguments.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
list: A list of input arguments for the DoGrad function.
|
|
190
|
+
"""
|
|
191
|
+
args_list = []
|
|
192
|
+
for op_arg in op_proto.op_args:
|
|
193
|
+
input_dtype = get_input_dtype(op_arg.arg_dtype, is_optional_param(op_arg))
|
|
194
|
+
if has_type:
|
|
195
|
+
args_list.append(f"const {input_dtype} &{op_arg.arg_name}")
|
|
196
|
+
else:
|
|
197
|
+
if not with_optional and is_optional_param(op_arg):
|
|
198
|
+
args_list.append(f"OptionalToValue({op_arg.arg_name})")
|
|
199
|
+
else:
|
|
200
|
+
args_list.append(f"{op_arg.arg_name}")
|
|
201
|
+
return args_list
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _get_return_type_str(op_proto):
|
|
205
|
+
"""
|
|
206
|
+
Get the return type string for the function.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
op_proto: The operator prototype.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
str: The return type as a string.
|
|
213
|
+
"""
|
|
214
|
+
returns_type = []
|
|
215
|
+
type_convert_to_base = {
|
|
216
|
+
'std::vector<mindspore::tensor::TensorPtr>': 'std::vector<mindspore::tensor::BaseTensorPtr>',
|
|
217
|
+
'mindspore::tensor::TensorPtr': 'mindspore::tensor::BaseTensorPtr'
|
|
218
|
+
}
|
|
219
|
+
for return_obj in op_proto.op_returns:
|
|
220
|
+
temp_return = get_return_type(return_obj.arg_dtype)
|
|
221
|
+
if temp_return in type_convert_to_base:
|
|
222
|
+
returns_type.append(type_convert_to_base[temp_return])
|
|
223
|
+
else:
|
|
224
|
+
raise Exception("Not return found")
|
|
225
|
+
if len(returns_type) == 1:
|
|
226
|
+
cpp_func_return = returns_type[0]
|
|
227
|
+
elif len(returns_type) > 1:
|
|
228
|
+
cpp_func_return = "std::tuple<"
|
|
229
|
+
cpp_func_return += ', '.join(s for s in returns_type)
|
|
230
|
+
cpp_func_return += ">"
|
|
231
|
+
else:
|
|
232
|
+
raise Exception("Not return found")
|
|
233
|
+
return cpp_func_return
|
|
@@ -22,82 +22,43 @@ import re
|
|
|
22
22
|
import pathlib
|
|
23
23
|
import logging
|
|
24
24
|
import gen_utils
|
|
25
|
+
import template
|
|
26
|
+
from op_proto import OpProto
|
|
25
27
|
from pyboost_utils import AclnnUtils, get_dtypes
|
|
26
28
|
from gen_constants import MS_OPS_KERNEL_PATH
|
|
27
|
-
|
|
29
|
+
import gen_constants as K
|
|
28
30
|
auto_gen = ''
|
|
29
31
|
|
|
30
32
|
|
|
31
|
-
def gen_h(
|
|
33
|
+
def gen_h(kernelmod_name, aclnn_name, op_proto, kernelmod_h_path, need_update_shape):
|
|
32
34
|
"""generate h files"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_{op_name.upper()}_ACLNN{auto_gen.upper()}_KERNEL_MOD_H_
|
|
36
|
-
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_{op_name.upper()}_ACLNN{auto_gen.upper()}_KERNEL_MOD_H_
|
|
37
|
-
#include <vector>
|
|
38
|
-
#include "ops/base_operator.h"
|
|
39
|
-
#include "{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn_kernel_mod.h"
|
|
40
|
-
#include "transform/acl_ir/acl_convert.h"
|
|
41
|
-
"""
|
|
42
|
-
update_shape = f"""
|
|
43
|
-
bool IsNeedUpdateOutputShapeAndSize() override {{ return true; }}
|
|
44
|
-
void UpdateOutputShapeAndSize(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs);
|
|
45
|
-
"""
|
|
35
|
+
op_name = op_proto.op_name
|
|
36
|
+
update_shape = template.UPDATE_OUTPUT_SHAPE_AND_SIZE
|
|
46
37
|
if not need_update_shape:
|
|
47
|
-
update_shape = ""
|
|
48
|
-
h_body = f"""
|
|
49
|
-
namespace mindspore {{
|
|
50
|
-
namespace kernel {{
|
|
51
|
-
|
|
52
|
-
class {kernelmod_name} : public AclnnKernelMod {{
|
|
53
|
-
public:
|
|
54
|
-
{kernelmod_name}() : AclnnKernelMod(std::move("{aclnn_name}")) {{}}
|
|
55
|
-
~{kernelmod_name}() = default;
|
|
56
|
-
bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
|
|
57
|
-
const std::vector<KernelTensor *> &outputs, void *stream_ptr) override;
|
|
58
|
-
void GetWorkSpaceInfo(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override;
|
|
59
|
-
{update_shape}
|
|
60
|
-
|
|
61
|
-
private:
|
|
62
|
-
DEFINE_GET_WORKSPACE_FOR_RESIZE()
|
|
63
|
-
}};
|
|
64
|
-
}} // namespace kernel
|
|
65
|
-
}} // namespace mindspore
|
|
38
|
+
update_shape = "\n "
|
|
66
39
|
|
|
67
|
-
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_{op_name.upper()}_ACLNN{auto_gen.upper()}_KERNEL_MOD_H_
|
|
68
|
-
"""
|
|
69
40
|
temp_file = kernelmod_h_path + "_tmp.h"
|
|
70
41
|
old_file = kernelmod_h_path + ".h"
|
|
71
42
|
flags = os.O_WRONLY | os.O_CREAT
|
|
72
43
|
mode = stat.S_IWUSR | stat.S_IRUSR
|
|
44
|
+
aclnn_kernel_h_str = template.ACLNN_KERNEL_H_TEMPLATE.replace(aclnn_name=aclnn_name,
|
|
45
|
+
op_name=op_name.upper(),
|
|
46
|
+
auto_gen=auto_gen.upper(),
|
|
47
|
+
kernelmod_name=kernelmod_name,
|
|
48
|
+
update_shape=update_shape,
|
|
49
|
+
ops_kernel_path=MS_OPS_KERNEL_PATH)
|
|
73
50
|
with os.fdopen(os.open(temp_file, flags, mode), 'w') as h_file:
|
|
74
|
-
h_file.write(
|
|
51
|
+
h_file.write(aclnn_kernel_h_str)
|
|
75
52
|
gen_utils.check_change_and_replace_file(old_file, temp_file)
|
|
76
53
|
|
|
77
54
|
|
|
78
|
-
def gen_cc(
|
|
55
|
+
def gen_cc(kernelmod_name, aclnn_name, op_proto, kernelmod_cc_path, need_update_shape):
|
|
79
56
|
"""generate cc files"""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
#include <algorithm>
|
|
84
|
-
#include <vector>
|
|
85
|
-
#include <memory>
|
|
86
|
-
#include <functional>
|
|
87
|
-
#include "ir/tensor.h"
|
|
88
|
-
#include "runtime/device/kernel_runtime.h"
|
|
89
|
-
#include "transform/acl_ir/op_api_convert.h"
|
|
90
|
-
#include "abstract/ops/primitive_infer_map.h"
|
|
91
|
-
|
|
92
|
-
namespace mindspore {{
|
|
93
|
-
namespace kernel {{
|
|
94
|
-
"""
|
|
95
|
-
tuple_tensor_not_supported = f"""
|
|
96
|
-
It is not supported for {op_name} with tuple[tensor] inputs when using auto generate.
|
|
97
|
-
Please provide a KernelMod name in yaml and using python gen_aclnn_implement.py -n xx manually."""
|
|
98
|
-
input_templete = ''
|
|
57
|
+
op_name = op_proto.op_name
|
|
58
|
+
tuple_tensor_not_supported = template.TUPLE_TENSOR_NOT_SUPPORTED.replace(op_name=op_name)
|
|
59
|
+
input_templete = '\n '
|
|
99
60
|
inputs = ''
|
|
100
|
-
input_dtypes, output_dtypes, _ = get_dtypes(
|
|
61
|
+
input_dtypes, output_dtypes, _ = get_dtypes(op_proto)
|
|
101
62
|
for idx, n in enumerate(input_dtypes):
|
|
102
63
|
input_name = "inputs[kIndex" + str(idx) + "], "
|
|
103
64
|
dtype = input_dtypes.get(n)
|
|
@@ -105,12 +66,12 @@ namespace kernel {{
|
|
|
105
66
|
if dtype == 'int':
|
|
106
67
|
dtype = 'int64_t'
|
|
107
68
|
input_templete += " auto {} = transform::ConvertKernelTensor<{}>(inputs[kIndex{}]);\n".format(
|
|
108
|
-
n, dtype, idx)
|
|
109
|
-
input_name = n + ", "
|
|
69
|
+
n.arg_name, dtype, idx)
|
|
70
|
+
input_name = n.arg_name + ", "
|
|
110
71
|
if dtype == 'tuple[tensor]' and auto_gen == "_auto_gen":
|
|
111
72
|
raise NotImplementedError(tuple_tensor_not_supported)
|
|
112
73
|
inputs += input_name
|
|
113
|
-
|
|
74
|
+
input_templete = '' if input_templete == '\n ' else input_templete
|
|
114
75
|
for idx, n in enumerate(output_dtypes):
|
|
115
76
|
output_name = "outputs[kIndex" + str(idx) + "], "
|
|
116
77
|
dtype = output_dtypes.get(n)
|
|
@@ -118,76 +79,60 @@ namespace kernel {{
|
|
|
118
79
|
if dtype == 'int':
|
|
119
80
|
dtype = 'int64_t'
|
|
120
81
|
input_templete += " auto {} = transform::ConvertKernelTensor<{}>(outputs[kIndex{}]);\n".format(
|
|
121
|
-
n, dtype, idx)
|
|
122
|
-
output_name = n + ", "
|
|
82
|
+
n.arg_name, dtype, idx)
|
|
83
|
+
output_name = n.arg_name + ", "
|
|
123
84
|
if dtype == 'tuple[tensor]' and auto_gen == "_auto_gen":
|
|
124
85
|
raise NotImplementedError(tuple_tensor_not_supported)
|
|
125
86
|
inputs += output_name
|
|
126
87
|
inputs = inputs[:-2]
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
const std::vector<KernelTensor *> &outputs) {{
|
|
130
|
-
{input_templete}
|
|
131
|
-
GetWorkspaceForResize({inputs});
|
|
132
|
-
}}
|
|
133
|
-
"""
|
|
134
|
-
launch = f"""
|
|
135
|
-
bool {kernelmod_name}::Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
|
|
136
|
-
const std::vector<KernelTensor *> &outputs, void *stream_ptr) {{
|
|
137
|
-
MS_EXCEPTION_IF_NULL(stream_ptr);
|
|
138
|
-
{input_templete}
|
|
139
|
-
RunOp(stream_ptr, workspace, {inputs});
|
|
140
|
-
return true;
|
|
141
|
-
}}
|
|
142
|
-
"""
|
|
143
|
-
update_shape = f"""
|
|
144
|
-
void {kernelmod_name}::UpdateOutputShapeAndSize(const std::vector<KernelTensor *> &,
|
|
145
|
-
const std::vector<KernelTensor *> &outputs) {{
|
|
146
|
-
// Delete these comment and complete the function:
|
|
147
|
-
// Using outputs[index_x]->SetShapeVector(update_shape) and outputs[index_x]->set_size(update_size)
|
|
148
|
-
}}
|
|
149
|
-
"""
|
|
88
|
+
|
|
89
|
+
update_shape = template.update_output_shape_and_size_template.replace(kernelmod_name=kernelmod_name)
|
|
150
90
|
if not need_update_shape:
|
|
151
91
|
update_shape = ""
|
|
152
92
|
|
|
153
|
-
reg = f"""
|
|
154
|
-
MS_ACLNN_KERNEL_FACTORY_REG({class_name}, {kernelmod_name});
|
|
155
|
-
}} // namespace kernel
|
|
156
|
-
}} // namespace mindspore
|
|
157
|
-
|
|
158
|
-
"""
|
|
159
93
|
temp_file = kernelmod_cc_path + "_tmp.cc"
|
|
160
94
|
old_file = kernelmod_cc_path + ".cc"
|
|
161
95
|
flags = os.O_WRONLY | os.O_CREAT
|
|
162
96
|
mode = stat.S_IWUSR | stat.S_IRUSR
|
|
97
|
+
aclnn_kernel_cc_str = template.ACLNN_KERNEL_CC_TEMPLATE.replace(kernelmod_name=kernelmod_name,
|
|
98
|
+
input_templete=input_templete,
|
|
99
|
+
inputs=inputs,
|
|
100
|
+
update_shape=update_shape,
|
|
101
|
+
class_name=aclnn_name,
|
|
102
|
+
auto_gen_path=MS_OPS_KERNEL_PATH,
|
|
103
|
+
op_name=op_name,
|
|
104
|
+
auto_gen=auto_gen) + " "
|
|
163
105
|
with os.fdopen(os.open(temp_file, flags, mode), 'w') as cc_file:
|
|
164
|
-
cc_file.write(
|
|
106
|
+
cc_file.write(aclnn_kernel_cc_str)
|
|
165
107
|
gen_utils.check_change_and_replace_file(old_file, temp_file)
|
|
166
108
|
|
|
167
109
|
|
|
168
|
-
def generate(
|
|
110
|
+
def generate(kernelmod_name, class_name, op_proto, h_and_cc, need_update_shape):
|
|
169
111
|
"""generate cc and h files"""
|
|
170
112
|
aclnn_name = AclnnUtils.get_aclnn_interface(class_name)
|
|
171
|
-
gen_h(
|
|
172
|
-
gen_cc(
|
|
113
|
+
gen_h(kernelmod_name, aclnn_name, op_proto, h_and_cc, need_update_shape)
|
|
114
|
+
gen_cc(kernelmod_name, class_name, op_proto, h_and_cc, need_update_shape)
|
|
173
115
|
|
|
174
116
|
|
|
175
|
-
def gen_aclnn_kernel(
|
|
117
|
+
def gen_aclnn_kernel(op_proto: OpProto, need_update_shape=False, auto=False):
|
|
176
118
|
"""gen_aclnn_kernel function"""
|
|
177
|
-
|
|
119
|
+
op_name = op_proto.op_name
|
|
120
|
+
skip_aclnn_list = {"slice", "expand_dims", "squeeze", "split", "generator"}
|
|
121
|
+
if op_name in skip_aclnn_list:
|
|
122
|
+
logging.warning("Operator {%s} has no aclnn interface, no aclnn kernel will be generated.", op_name)
|
|
123
|
+
return
|
|
124
|
+
if check_op_registed(op_proto.op_name) and not auto:
|
|
178
125
|
logging.warning("Kernel {%s} is already registered.", op_name)
|
|
179
126
|
return
|
|
180
127
|
current_path = os.path.dirname(os.path.realpath(__file__))
|
|
181
128
|
work_path = os.path.join(current_path, '../../../../')
|
|
182
129
|
|
|
183
|
-
aclnn_path = '{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn/'
|
|
130
|
+
aclnn_path = f'{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn/'
|
|
184
131
|
# merge inner ops
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
dispatch = op_yaml.get("dispatch")
|
|
190
|
-
if not dispatch or not dispatch.get("enable"):
|
|
132
|
+
dispatch = op_proto.op_dispatch
|
|
133
|
+
aclnn_name = ''.join(word.capitalize() for word in op_name.split('_'))
|
|
134
|
+
kernelmod_name = op_proto.op_dispatch.ascend
|
|
135
|
+
if not dispatch or not op_proto.op_dispatch.enable:
|
|
191
136
|
raise ValueError("Op {} is not enabled dispatch, please check.".format(op_name))
|
|
192
137
|
global auto_gen
|
|
193
138
|
if auto:
|
|
@@ -195,17 +140,17 @@ def gen_aclnn_kernel(op_name, yaml_str, need_update_shape=False, auto=False):
|
|
|
195
140
|
# Don't auto generate kernelmod if it is achieved manually.
|
|
196
141
|
return
|
|
197
142
|
auto_gen = "_auto_gen"
|
|
198
|
-
|
|
143
|
+
kernelmod_name = aclnn_name + "Ascend"
|
|
199
144
|
aclnn_path = f'{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn_auto_gen/'
|
|
200
145
|
pathlib.Path(os.path.join(work_path, aclnn_path)).mkdir(parents=True, exist_ok=True)
|
|
201
|
-
if dispatch.
|
|
146
|
+
if dispatch.ascend is None:
|
|
202
147
|
raise ValueError("KernelMod {} is auto generated. If need achieve it, "
|
|
203
148
|
"please provide the KernelMod name in dispatch.".format(op_name))
|
|
204
|
-
op_class =
|
|
205
|
-
if op_class is not None and op_class.
|
|
206
|
-
|
|
149
|
+
op_class = op_proto.op_class
|
|
150
|
+
if op_class is not None and op_class.name is not None:
|
|
151
|
+
aclnn_name = op_class.name
|
|
207
152
|
kernelmod_h_and_cc_path = os.path.join(work_path, aclnn_path + '{}_aclnn_kernel'.format(op_name))
|
|
208
|
-
generate(
|
|
153
|
+
generate(kernelmod_name, aclnn_name, op_proto, kernelmod_h_and_cc_path, need_update_shape)
|
|
209
154
|
|
|
210
155
|
|
|
211
156
|
def get_registed_ops(file_path=f'{MS_OPS_KERNEL_PATH}/ascend/opapi/'):
|
|
@@ -235,12 +180,63 @@ manual_registed_ops = get_registed_ops(f'{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn
|
|
|
235
180
|
|
|
236
181
|
def check_op_registed(op_name, manual=False):
|
|
237
182
|
'''if op already registered return true'''
|
|
238
|
-
global registed_ops
|
|
239
|
-
global manual_registed_ops
|
|
240
183
|
class_name = ''.join(word.capitalize() for word in op_name.split('_'))
|
|
241
184
|
return (class_name in manual_registed_ops) if manual else (class_name in registed_ops)
|
|
242
185
|
|
|
243
186
|
|
|
187
|
+
def generate_aclnn_reg_code(yaml_data):
|
|
188
|
+
"""generate aclnn register code"""
|
|
189
|
+
current_path = os.path.dirname(os.path.realpath(__file__))
|
|
190
|
+
work_path = os.path.join(current_path, '../../../../')
|
|
191
|
+
ops_yaml_path = os.path.join(work_path, K.PY_OPS_GEN_PATH, "ops.yaml")
|
|
192
|
+
yaml_str = gen_utils.safe_load_yaml(ops_yaml_path)
|
|
193
|
+
|
|
194
|
+
reg_code = f"""
|
|
195
|
+
#include "{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn_kernel_mod.h"
|
|
196
|
+
|
|
197
|
+
namespace mindspore {{
|
|
198
|
+
namespace kernel {{
|
|
199
|
+
"""
|
|
200
|
+
for operator_name, operator_data in yaml_data.items():
|
|
201
|
+
dispatch = operator_data.get("dispatch")
|
|
202
|
+
if not dispatch or not dispatch.get("enable"):
|
|
203
|
+
continue
|
|
204
|
+
Ascend = dispatch.get("Ascend")
|
|
205
|
+
if Ascend is not None: # KernelMod is provided by yaml, don't auto generate it.
|
|
206
|
+
continue
|
|
207
|
+
if check_op_registed(operator_name):
|
|
208
|
+
logging.warning("Kernel {%s} is already registered.", operator_name)
|
|
209
|
+
continue
|
|
210
|
+
_, _, none_tensor_exist = get_dtypes(operator_data)
|
|
211
|
+
if none_tensor_exist:
|
|
212
|
+
gen_aclnn_kernel(operator_name, yaml_str, auto=True)
|
|
213
|
+
continue
|
|
214
|
+
class_name = ''.join(word.capitalize() for word in operator_name.split('_'))
|
|
215
|
+
op_class = operator_data.get("class")
|
|
216
|
+
if op_class and op_class.get("name") is not None:
|
|
217
|
+
class_name = op_class.get("name")
|
|
218
|
+
inputs_outputs_num = len(operator_data.get("args")) + len(operator_data.get("returns"))
|
|
219
|
+
aclnn_name = AclnnUtils.get_aclnn_interface(class_name)
|
|
220
|
+
reg_code += f"""
|
|
221
|
+
MS_ACLNN_COMMON_KERNEL_FACTORY_REG({class_name}, {aclnn_name}, {inputs_outputs_num});"""
|
|
222
|
+
reg_code += f"""
|
|
223
|
+
}} // namespace kernel
|
|
224
|
+
}} // namespace mindspore
|
|
225
|
+
"""
|
|
226
|
+
return reg_code
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def generate_aclnn_reg_file(work_path, yaml_str):
|
|
230
|
+
"""
|
|
231
|
+
Generate nnacl kernelmod register
|
|
232
|
+
"""
|
|
233
|
+
tmp_register_file = work_path + f'{MS_OPS_KERNEL_PATH}/ascend/opapi/tmp_aclnn_kernel_register.cc'
|
|
234
|
+
register_file = work_path + f'{MS_OPS_KERNEL_PATH}/ascend/opapi/aclnn_kernel_register_auto.cc'
|
|
235
|
+
reg_code = generate_aclnn_reg_code(yaml_str)
|
|
236
|
+
gen_utils.write_file(tmp_register_file, gen_utils.cc_license_str + reg_code)
|
|
237
|
+
gen_utils.check_change_and_replace_file(register_file, tmp_register_file)
|
|
238
|
+
|
|
239
|
+
|
|
244
240
|
def main(op_name, need_update_shape):
|
|
245
241
|
'''main func'''
|
|
246
242
|
gen_aclnn_kernel(op_name, need_update_shape)
|
|
@@ -259,5 +255,5 @@ if __name__ == "__main__":
|
|
|
259
255
|
raise ValueError("Please provide op name to generate aclnn kernelmod.")
|
|
260
256
|
is_need_update_shape = options.need_update_shape
|
|
261
257
|
main(name, is_need_update_shape)
|
|
262
|
-
except Exception as e:
|
|
258
|
+
except Exception as e: # pylint: disable=W0703
|
|
263
259
|
logging.exception("Generate aclnn kernelmod failed, err info: %s", e)
|