mindspore 2.1.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.10__cp37-cp37m-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (580) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +46 -19
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
  20. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  21. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  22. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  23. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  24. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  25. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  26. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  27. mindspore/_akg/akg/utils/kernel_exec.py +98 -274
  28. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  29. mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
  30. mindspore/_akg/akg/utils/util.py +38 -0
  31. mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
  32. mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
  33. mindspore/_c_mindrecord.cpython-37m-x86_64-linux-gnu.so +0 -0
  34. mindspore/_check_jit_forbidden_api.py +3 -1
  35. mindspore/_checkparam.py +23 -29
  36. mindspore/_extends/graph_kernel/__init__.py +0 -1
  37. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  38. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  39. mindspore/_extends/graph_kernel/splitter.py +4 -11
  40. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  41. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  42. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  43. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  44. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  45. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  46. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  47. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  48. mindspore/_extends/parse/__init__.py +12 -15
  49. mindspore/_extends/parse/namespace.py +7 -33
  50. mindspore/_extends/parse/parser.py +61 -71
  51. mindspore/_extends/parse/resources.py +1 -1
  52. mindspore/_extends/parse/standard_method.py +74 -104
  53. mindspore/_extends/parse/trope.py +1 -1
  54. mindspore/_extends/remote/kernel_build_server.py +25 -7
  55. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  56. mindspore/_install_custom.py +43 -0
  57. mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
  58. mindspore/amp.py +47 -11
  59. mindspore/bin/cache_admin +0 -0
  60. mindspore/bin/cache_server +0 -0
  61. mindspore/boost/boost.py +1 -8
  62. mindspore/boost/boost_cell_wrapper.py +3 -2
  63. mindspore/boost/grad_accumulation.py +1 -1
  64. mindspore/boost/group_loss_scale_manager.py +8 -7
  65. mindspore/common/__init__.py +5 -3
  66. mindspore/common/_jit_fallback_utils.py +6 -0
  67. mindspore/common/_register_for_adapter.py +2 -0
  68. mindspore/common/_register_for_tensor.py +2 -2
  69. mindspore/common/_stub_tensor.py +13 -0
  70. mindspore/common/_utils.py +13 -0
  71. mindspore/common/api.py +174 -259
  72. mindspore/common/auto_dynamic_shape.py +494 -0
  73. mindspore/common/dtype.py +18 -11
  74. mindspore/common/dump.py +6 -4
  75. mindspore/common/initializer.py +14 -14
  76. mindspore/common/jit_config.py +33 -15
  77. mindspore/common/lazy_inline.py +126 -7
  78. mindspore/common/mindir_util.py +101 -0
  79. mindspore/common/parameter.py +51 -41
  80. mindspore/common/seed.py +4 -4
  81. mindspore/common/sparse_tensor.py +13 -14
  82. mindspore/common/tensor.py +243 -165
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +83 -4
  85. mindspore/communication/management.py +152 -84
  86. mindspore/config/op_info.config +14 -3
  87. mindspore/config/super_bar_config.json +4 -2
  88. mindspore/context.py +152 -61
  89. mindspore/dataset/__init__.py +5 -5
  90. mindspore/dataset/audio/__init__.py +2 -2
  91. mindspore/dataset/audio/transforms.py +52 -52
  92. mindspore/dataset/callback/ds_callback.py +16 -2
  93. mindspore/dataset/core/config.py +68 -51
  94. mindspore/dataset/engine/cache_client.py +28 -5
  95. mindspore/dataset/engine/datasets.py +250 -112
  96. mindspore/dataset/engine/datasets_audio.py +43 -211
  97. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  98. mindspore/dataset/engine/datasets_text.py +43 -67
  99. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  100. mindspore/dataset/engine/datasets_vision.py +219 -1029
  101. mindspore/dataset/engine/iterators.py +11 -4
  102. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  103. mindspore/dataset/engine/obs/util.py +3 -0
  104. mindspore/dataset/engine/samplers.py +1 -1
  105. mindspore/dataset/engine/validators.py +19 -5
  106. mindspore/dataset/text/__init__.py +3 -3
  107. mindspore/dataset/text/transforms.py +101 -127
  108. mindspore/dataset/text/utils.py +205 -138
  109. mindspore/dataset/transforms/__init__.py +1 -1
  110. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  111. mindspore/dataset/transforms/transforms.py +95 -40
  112. mindspore/dataset/utils/browse_dataset.py +8 -2
  113. mindspore/dataset/utils/line_reader.py +17 -19
  114. mindspore/dataset/vision/__init__.py +3 -3
  115. mindspore/dataset/vision/c_transforms.py +6 -3
  116. mindspore/dataset/vision/transforms.py +409 -287
  117. mindspore/dataset/vision/utils.py +13 -14
  118. mindspore/dataset/vision/validators.py +11 -1
  119. mindspore/experimental/map_parameter.py +14 -0
  120. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  121. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  122. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  123. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  124. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  125. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  126. mindspore/gen_ops.py +273 -0
  127. mindspore/include/OWNERS +0 -1
  128. mindspore/include/api/data_type.h +2 -1
  129. mindspore/include/api/graph.h +0 -15
  130. mindspore/include/api/kernel.h +2 -0
  131. mindspore/include/api/kernel_api.h +37 -12
  132. mindspore/include/api/model.h +17 -14
  133. mindspore/include/api/status.h +8 -3
  134. mindspore/include/api/types.h +37 -4
  135. mindspore/include/c_api/ms/abstract.h +67 -0
  136. mindspore/include/c_api/ms/attribute.h +197 -0
  137. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  138. mindspore/include/c_api/ms/base/macros.h +32 -0
  139. mindspore/include/c_api/ms/base/status.h +33 -0
  140. mindspore/include/c_api/ms/base/types.h +282 -0
  141. mindspore/include/c_api/ms/context.h +102 -0
  142. mindspore/include/c_api/ms/graph.h +160 -0
  143. mindspore/include/c_api/ms/node.h +606 -0
  144. mindspore/include/c_api/ms/tensor.h +161 -0
  145. mindspore/include/c_api/ms/value.h +84 -0
  146. mindspore/include/dataset/constants.h +6 -5
  147. mindspore/include/dataset/execute.h +23 -13
  148. mindspore/include/dataset/text.h +26 -26
  149. mindspore/include/dataset/transforms.h +13 -13
  150. mindspore/include/dataset/vision.h +60 -60
  151. mindspore/include/dataset/vision_ascend.h +5 -6
  152. mindspore/include/dataset/vision_lite.h +17 -17
  153. mindspore/include/mindapi/base/type_id.h +1 -0
  154. mindspore/include/mindapi/base/types.h +1 -0
  155. mindspore/lib/libdnnl.so.2 +0 -0
  156. mindspore/lib/libjemalloc.so.2 +0 -0
  157. mindspore/lib/libmindspore.so +0 -0
  158. mindspore/lib/libmindspore_backend.so +0 -0
  159. mindspore/lib/libmindspore_common.so +0 -0
  160. mindspore/lib/libmindspore_core.so +0 -0
  161. mindspore/lib/libmindspore_glog.so.0 +0 -0
  162. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  163. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  164. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  165. mindspore/lib/libmindspore_shared_lib.so +0 -0
  166. mindspore/lib/libnnacl.so +0 -0
  167. mindspore/lib/libopencv_core.so.4.5 +0 -0
  168. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  169. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  170. mindspore/lib/libps_cache.so +0 -0
  171. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  172. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  173. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  174. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  175. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  176. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  177. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  178. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  179. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  180. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  181. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  182. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  183. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  186. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8928 -0
  187. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  188. mindspore/lib/plugin/ascend/libakg.so +0 -0
  189. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  190. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  191. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  192. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  193. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  194. mindspore/lib/plugin/cpu/libakg.so +0 -0
  195. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  196. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  197. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  198. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  199. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  201. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  202. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  203. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  204. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  205. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  206. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  207. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  208. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  209. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  210. mindspore/nn/__init__.py +0 -2
  211. mindspore/nn/cell.py +313 -74
  212. mindspore/nn/dynamic_lr.py +21 -21
  213. mindspore/nn/layer/activation.py +22 -30
  214. mindspore/nn/layer/basic.py +15 -13
  215. mindspore/nn/layer/channel_shuffle.py +1 -1
  216. mindspore/nn/layer/container.py +271 -9
  217. mindspore/nn/layer/conv.py +323 -204
  218. mindspore/nn/layer/dense.py +8 -5
  219. mindspore/nn/layer/embedding.py +33 -27
  220. mindspore/nn/layer/flash_attention.py +141 -88
  221. mindspore/nn/layer/image.py +8 -6
  222. mindspore/nn/layer/math.py +16 -25
  223. mindspore/nn/layer/normalization.py +107 -66
  224. mindspore/nn/layer/padding.py +1 -1
  225. mindspore/nn/layer/pooling.py +131 -109
  226. mindspore/nn/layer/rnn_cells.py +27 -22
  227. mindspore/nn/layer/rnns.py +13 -16
  228. mindspore/nn/layer/thor_layer.py +1 -1
  229. mindspore/nn/layer/transformer.py +221 -154
  230. mindspore/nn/learning_rate_schedule.py +9 -1
  231. mindspore/nn/loss/loss.py +235 -174
  232. mindspore/nn/optim/ada_grad.py +2 -1
  233. mindspore/nn/optim/adadelta.py +1 -0
  234. mindspore/nn/optim/adafactor.py +2 -1
  235. mindspore/nn/optim/adam.py +7 -4
  236. mindspore/nn/optim/adamax.py +3 -2
  237. mindspore/nn/optim/adasum.py +2 -2
  238. mindspore/nn/optim/asgd.py +2 -3
  239. mindspore/nn/optim/ftrl.py +6 -5
  240. mindspore/nn/optim/lamb.py +7 -4
  241. mindspore/nn/optim/lars.py +1 -1
  242. mindspore/nn/optim/lazyadam.py +5 -3
  243. mindspore/nn/optim/momentum.py +2 -1
  244. mindspore/nn/optim/optimizer.py +53 -4
  245. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  246. mindspore/nn/optim/rmsprop.py +4 -3
  247. mindspore/nn/optim/rprop.py +23 -12
  248. mindspore/nn/optim/sgd.py +26 -11
  249. mindspore/nn/optim/thor.py +9 -7
  250. mindspore/nn/probability/bijector/bijector.py +5 -5
  251. mindspore/nn/probability/bijector/power_transform.py +27 -27
  252. mindspore/nn/probability/bijector/softplus.py +3 -3
  253. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  254. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  255. mindspore/nn/probability/distribution/beta.py +3 -3
  256. mindspore/nn/probability/distribution/categorical.py +7 -7
  257. mindspore/nn/probability/distribution/cauchy.py +0 -1
  258. mindspore/nn/probability/distribution/distribution.py +3 -3
  259. mindspore/nn/probability/distribution/gamma.py +3 -3
  260. mindspore/nn/probability/distribution/geometric.py +4 -4
  261. mindspore/nn/probability/distribution/gumbel.py +4 -4
  262. mindspore/nn/probability/distribution/log_normal.py +2 -2
  263. mindspore/nn/probability/distribution/logistic.py +2 -2
  264. mindspore/nn/probability/distribution/poisson.py +4 -4
  265. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  266. mindspore/nn/probability/distribution/uniform.py +6 -6
  267. mindspore/nn/wrap/cell_wrapper.py +84 -34
  268. mindspore/nn/wrap/grad_reducer.py +8 -5
  269. mindspore/nn/wrap/loss_scale.py +105 -42
  270. mindspore/numpy/array_creations.py +1 -2
  271. mindspore/numpy/array_ops.py +3 -2
  272. mindspore/numpy/utils_const.py +5 -5
  273. mindspore/offline_debug/convert_async.py +2 -2
  274. mindspore/ops/_grad_experimental/__init__.py +0 -5
  275. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  276. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  277. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  278. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  279. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  280. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  281. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  282. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  283. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  284. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
  285. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
  286. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
  287. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
  288. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
  289. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
  290. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  291. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  292. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  293. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  294. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  295. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  296. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  297. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  298. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  299. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  300. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  301. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  302. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  303. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  304. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  305. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  306. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  307. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  308. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  309. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  310. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  311. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  312. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  313. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  314. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  315. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  316. mindspore/ops/_primitive_cache.py +1 -1
  317. mindspore/ops/_tracefunc.py +45 -13
  318. mindspore/ops/_utils/utils.py +6 -1
  319. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  320. mindspore/ops/_vmap/vmap_base.py +3 -3
  321. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  322. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  323. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  324. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  325. mindspore/ops/arg_dtype_cast.py +54 -0
  326. mindspore/ops/composite/base.py +37 -10
  327. mindspore/ops/composite/math_ops.py +5 -4
  328. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  329. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  330. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  331. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  332. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  333. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  334. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  335. mindspore/ops/deprecated.py +304 -0
  336. mindspore/ops/function/__init__.py +4 -1
  337. mindspore/ops/function/array_func.py +174 -193
  338. mindspore/ops/function/clip_func.py +81 -13
  339. mindspore/ops/function/debug_func.py +1 -1
  340. mindspore/ops/function/grad/grad_func.py +18 -9
  341. mindspore/ops/function/image_func.py +10 -4
  342. mindspore/ops/function/linalg_func.py +5 -5
  343. mindspore/ops/function/math_func.py +575 -386
  344. mindspore/ops/function/nn_func.py +568 -260
  345. mindspore/ops/function/random_func.py +88 -57
  346. mindspore/ops/function/sparse_func.py +1 -1
  347. mindspore/ops/function/sparse_unary_func.py +14 -12
  348. mindspore/ops/function/vmap_func.py +6 -5
  349. mindspore/ops/functional.py +15 -10
  350. mindspore/ops/op_info_register.py +244 -25
  351. mindspore/ops/operations/__init__.py +28 -19
  352. mindspore/ops/operations/_grad_ops.py +72 -7
  353. mindspore/ops/operations/_inner_ops.py +350 -17
  354. mindspore/ops/operations/_quant_ops.py +4 -8
  355. mindspore/ops/operations/_sequence_ops.py +42 -0
  356. mindspore/ops/operations/array_ops.py +68 -282
  357. mindspore/ops/operations/comm_ops.py +107 -59
  358. mindspore/ops/operations/custom_ops.py +94 -70
  359. mindspore/ops/operations/debug_ops.py +8 -4
  360. mindspore/ops/operations/image_ops.py +18 -12
  361. mindspore/ops/operations/inner_ops.py +26 -3
  362. mindspore/ops/operations/math_ops.py +189 -141
  363. mindspore/ops/operations/nn_ops.py +794 -489
  364. mindspore/ops/operations/other_ops.py +0 -22
  365. mindspore/ops/operations/random_ops.py +53 -111
  366. mindspore/ops/operations/sparse_ops.py +3 -1
  367. mindspore/ops/primitive.py +24 -18
  368. mindspore/parallel/_auto_parallel_context.py +68 -8
  369. mindspore/parallel/_cost_model_context.py +2 -2
  370. mindspore/parallel/_offload_context.py +17 -3
  371. mindspore/parallel/_parallel_serialization.py +12 -5
  372. mindspore/parallel/_ps_context.py +12 -0
  373. mindspore/parallel/_tensor.py +18 -13
  374. mindspore/parallel/_transformer/layers.py +5 -3
  375. mindspore/parallel/_transformer/loss.py +1 -0
  376. mindspore/parallel/_transformer/moe.py +2 -2
  377. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  378. mindspore/parallel/_transformer/transformer.py +23 -3
  379. mindspore/parallel/_utils.py +11 -7
  380. mindspore/parallel/algo_parameter_config.py +85 -5
  381. mindspore/parallel/checkpoint_transform.py +19 -12
  382. mindspore/parallel/shard.py +21 -14
  383. mindspore/profiler/common/struct_type.py +3 -3
  384. mindspore/profiler/common/util.py +4 -2
  385. mindspore/profiler/envprofiling.py +1 -1
  386. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  387. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  388. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  389. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  390. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  391. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  392. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  393. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  394. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  395. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  396. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  397. mindspore/profiler/parser/flops_parser.py +15 -11
  398. mindspore/profiler/parser/framework_parser.py +38 -22
  399. mindspore/profiler/parser/hccl_parser.py +16 -12
  400. mindspore/profiler/parser/integrator.py +22 -11
  401. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  402. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  403. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  404. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  405. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  406. mindspore/profiler/parser/optime_parser.py +1 -1
  407. mindspore/profiler/parser/profiler_info.py +21 -2
  408. mindspore/profiler/parser/step_trace_parser.py +11 -14
  409. mindspore/profiler/profiling.py +179 -89
  410. mindspore/rewrite/api/node.py +102 -19
  411. mindspore/rewrite/api/node_type.py +5 -1
  412. mindspore/rewrite/api/pattern_engine.py +1 -1
  413. mindspore/rewrite/api/scoped_value.py +9 -17
  414. mindspore/rewrite/api/symbol_tree.py +131 -47
  415. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  416. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  417. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  418. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  419. mindspore/rewrite/common/rewrite_elog.py +5 -1
  420. mindspore/rewrite/namer.py +33 -24
  421. mindspore/rewrite/namespace.py +14 -5
  422. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  423. mindspore/rewrite/node/call_function.py +79 -0
  424. mindspore/rewrite/node/cell_container.py +135 -0
  425. mindspore/rewrite/node/control_flow.py +88 -0
  426. mindspore/rewrite/{node.py → node/node.py} +273 -234
  427. mindspore/rewrite/node/node_manager.py +254 -0
  428. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  429. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  430. mindspore/rewrite/parsers/assign_parser.py +216 -221
  431. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  432. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  433. mindspore/rewrite/parsers/constant_parser.py +9 -6
  434. mindspore/rewrite/parsers/container_parser.py +9 -7
  435. mindspore/rewrite/parsers/for_parser.py +36 -15
  436. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  437. mindspore/rewrite/parsers/if_parser.py +28 -24
  438. mindspore/rewrite/parsers/module_parser.py +196 -25
  439. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  440. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  441. mindspore/rewrite/parsers/return_parser.py +6 -6
  442. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  443. mindspore/rewrite/sparsify/utils.py +1 -1
  444. mindspore/rewrite/symbol_tree.py +523 -578
  445. mindspore/rewrite/symbol_tree_builder.py +9 -193
  446. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  447. mindspore/run_check/_check_version.py +6 -4
  448. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  449. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  450. mindspore/scipy/linalg.py +1 -1
  451. mindspore/scipy/optimize/minimize.py +7 -3
  452. mindspore/train/_utils.py +7 -3
  453. mindspore/train/amp.py +323 -123
  454. mindspore/train/anf_ir_pb2.py +14 -2
  455. mindspore/train/callback/_backup_and_restore.py +2 -12
  456. mindspore/train/callback/_callback.py +29 -4
  457. mindspore/train/callback/_checkpoint.py +23 -8
  458. mindspore/train/callback/_early_stop.py +2 -2
  459. mindspore/train/callback/_landscape.py +4 -4
  460. mindspore/train/callback/_loss_monitor.py +2 -2
  461. mindspore/train/callback/_on_request_exit.py +2 -2
  462. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  463. mindspore/train/callback/_summary_collector.py +15 -8
  464. mindspore/train/callback/_time_monitor.py +58 -5
  465. mindspore/train/data_sink.py +5 -11
  466. mindspore/train/dataset_helper.py +84 -57
  467. mindspore/train/loss_scale_manager.py +2 -2
  468. mindspore/train/metrics/__init__.py +3 -3
  469. mindspore/train/metrics/cosine_similarity.py +1 -1
  470. mindspore/train/metrics/hausdorff_distance.py +3 -2
  471. mindspore/train/metrics/mean_surface_distance.py +3 -2
  472. mindspore/train/metrics/metric.py +39 -19
  473. mindspore/train/metrics/roc.py +2 -2
  474. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  475. mindspore/train/mind_ir_pb2.py +85 -36
  476. mindspore/train/model.py +187 -47
  477. mindspore/train/serialization.py +487 -161
  478. mindspore/train/summary/_summary_adapter.py +1 -1
  479. mindspore/train/summary/_writer_pool.py +3 -2
  480. mindspore/train/summary/summary_record.py +37 -17
  481. mindspore/train/train_thor/convert_utils.py +3 -3
  482. mindspore/train/train_thor/dataset_helper.py +1 -1
  483. mindspore/version.py +1 -1
  484. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +6 -7
  485. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +488 -528
  486. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -1
  487. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  488. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  489. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  490. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  491. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  492. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  493. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  494. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  495. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  496. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  497. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  498. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  499. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  500. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  501. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  502. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  503. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  504. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  505. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  506. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  507. mindspore/_extends/graph_kernel/expander.py +0 -80
  508. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  509. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  510. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  511. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  512. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  513. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  514. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  515. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  516. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  517. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  518. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  519. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  520. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  521. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  522. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  523. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  524. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  525. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  526. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  527. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  528. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  529. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  530. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  531. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  532. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  533. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  534. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  535. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  536. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  537. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  538. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  539. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  540. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  541. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  542. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  543. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  544. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  545. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  546. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  547. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  548. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  549. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  550. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  551. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  552. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  553. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  554. mindspore/dataset/datapreprocess/__init__.py +0 -20
  555. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  556. mindspore/include/api/net.h +0 -142
  557. mindspore/nn/lr_scheduler.py +0 -262
  558. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  559. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  560. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  561. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  562. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  563. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  565. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  566. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  567. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  568. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  569. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  570. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  571. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  572. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  573. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  574. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  576. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  577. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  578. mindspore/rewrite/node_visitor.py +0 -44
  579. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
  580. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
- # Copyright 2019-2022 Huawei Technologies Co., Ltd
3
+ # Copyright 2019-2023 Huawei Technologies Co., Ltd
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -35,8 +35,6 @@ import numpy as np
35
35
 
36
36
  import akg
37
37
  import akg.tvm
38
- from akg.tvm import autotvm
39
- from akg.tvm import rpc
40
38
  from akg.tvm import _api_internal
41
39
  from akg.build_module import help_tiling_level
42
40
  from akg.utils import result_analysis as ra_util
@@ -45,16 +43,16 @@ from akg.utils import custom_tiling as ct_util
45
43
  from akg.utils import validation_check as vc_util
46
44
  from akg.utils.dsl_create import TensorUtils
47
45
  from akg.utils.util import parse_kwargs
48
- from akg.backend.parsing_profiling_data import HWTSLogParser
46
+ from akg.backend.parsing_profiling_data import HWTSLogParser, max_time_consume
49
47
  from akg.backend.parsing_profiling_data import validate_and_normalize_path
50
48
  from akg.backend import aic_model
51
-
49
+ from .ascend_profilier.cann_file_parser import CANNFileParser
50
+ from .ascend_profilier.op_summary_parser import OpSummaryParser
51
+ from .ascend_profilier.op_summary_headers import OpSummaryHeaders
52
52
  sh = logging.StreamHandler(sys.stdout)
53
53
  logging.getLogger().addHandler(sh)
54
54
  logging.getLogger().setLevel(logging.INFO)
55
55
 
56
- rpc_machine = {}
57
- rpc_lb = {}
58
56
 
59
57
  PERFORMANCE_TEST_FILE = "PERFORMANCE_TEST_FILE"
60
58
  BINDS = "binds"
@@ -173,212 +171,7 @@ def gen_name_kernel(kernel, dtype, shapes):
173
171
  return res
174
172
 
175
173
 
176
- def load_rpc_server_info(mode):
177
- """
178
- load rpc server host and port info.
179
-
180
- Args:
181
- mode (str): string of runtime choose, can set ca aic and rpc.
182
- """
183
- env_dic = os.environ
184
- if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
185
- return
186
-
187
- if mode == 'rpc_cloud':
188
- logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
189
- raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
190
-
191
- rpc_server_info_config = env_dic.get('RPC_SERVER_INFO_FILE')
192
- if not rpc_server_info_config:
193
- logging.error("runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
194
- raise Exception("ERROR:runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
195
-
196
- # load rpc server host and port info from local file.
197
- import json
198
- with open(rpc_server_info_config, 'r') as f:
199
- info = json.load(f)
200
-
201
- for i in info:
202
- rpc_machine[i] = info[i]
203
- rpc_lb[i] = 0.0
204
- return
205
-
206
-
207
- def dispatch(rank=0):
208
- """Function for lock waiting dispatch handle version 1."""
209
-
210
- def _sort_by_value(d):
211
- items = list(d.items())
212
- random.shuffle(items)
213
- items.sort(key=lambda x: x[1])
214
- return list(item[0] for item in items)
215
-
216
- for k, v in rpc_lb.items():
217
- logging.info("######rpc_lb[%s]=%f", rpc_machine.get(k)[0], v)
218
- lb_list = _sort_by_value(rpc_lb)
219
- if len(lb_list) > rank:
220
- return lb_list[rank]
221
- return lb_list[len(lb_list) - 1]
222
-
223
-
224
- def commit(remote, weight):
225
- rpc_lb[remote] = weight
226
-
227
-
228
- @func_time_required
229
- def mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=False):
230
- """internal RPC worker, should be called by mod_launch_rpc_thread."""
231
- logging.info("%s:====start connect to rpc ip: %s, rpc port: %d ",
232
- datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
233
- remote = rpc.connect(host, port, session_timeout=300)
234
- logging.info("%s:====connect to rpc ip: %s, rpc port: %d finished ",
235
- datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
236
- uuid_str = uuid.uuid4().hex
237
- temp_file_name = "stackvm_%s.o" % uuid_str
238
- mod.save(temp_file_name)
239
- remote.upload(temp_file_name)
240
- remote_mod = remote.load_module(temp_file_name)
241
- ctx = remote.cce()
242
- arg_list = []
243
- for a in args:
244
- arg_list.append(akg.tvm.nd.array(a, ctx))
245
- start_time = timer()
246
- remote_mod(*arg_list)
247
- ctx.sync()
248
- if os.path.exists(temp_file_name):
249
- os.remove(temp_file_name)
250
- out_list = []
251
- for i in outputs:
252
- out = arg_list[len(arg_list) + i if i < 0 else i].asnumpy()
253
- out_list.append(out)
254
- # this time measure is no accurate now, to be improved soon
255
- t = timer() - start_time
256
- if not tuning:
257
- return out_list[0] if len(out_list) == 1 else tuple(out_list)
258
- stat_info = {"run_time": t}
259
- return out_list[0] if len(out_list) == 1 else tuple(out_list), stat_info
260
-
261
-
262
- def mod_launch_rpc_thread(mode, mod, args, outputs, results, need_retry, retry, tuning=False):
263
- """internal RPC thread, should be called by mod_launch_rpc_multithread."""
264
- remoteevb = '0'
265
- host = None
266
- port = None
267
- env_dic = os.environ
268
- if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
269
- host = env_dic.get('RPC_HOST')
270
- port = int(env_dic.get('RPC_PORT'))
271
- else:
272
- if mode == 'rpc_cloud':
273
- logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
274
- raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
275
- remoteevb = dispatch(retry)
276
- host = rpc_machine.get(remoteevb)[0]
277
- port = rpc_machine.get(remoteevb)[1]
278
-
279
- start_time = timer()
280
- end_time = 0.0
281
- logging.debug("rpc ip: %s, rpc port: %d", host, port)
282
- try:
283
- out_list = mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=tuning)
284
- end_time = timer()
285
- t = end_time - start_time
286
- if not env_dic.get('RPC_HOST'):
287
- commit(remoteevb, 20 if t > 20 else t)
288
- logging.info("===this round host is %s time is %f", host, (end_time - start_time))
289
- results[retry] = out_list
290
- except RuntimeError:
291
- need_retry[retry] = True
292
- end_time = timer()
293
- logging.error("===Failed! this round host is %s time is %f", host, (end_time - start_time))
294
- if not env_dic.get('RPC_HOST'):
295
- commit(remoteevb, end_time - start_time + 20 * (retry + 1))
296
- logging.error("rpc retry error: %d %s", retry, sys.exc_info())
297
-
298
-
299
- def _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried):
300
- """Get rpc run result."""
301
- while poll_count > 0:
302
- poll_count -= 1
303
- # wait for the newly created thread, because it is most likely to complete first
304
- threads[thread_index].join(poll_interval)
305
- for poll_index in range(thread_index + 1):
306
- if not threads[poll_index].is_alive() and not need_retry[poll_index]:
307
- return True, results[poll_index]
308
- if need_retry[poll_index] and not retried[poll_index]:
309
- logging.error("Thread %d exit with error, spawn a new thread immediately", poll_index)
310
- poll_count = 0
311
- retried[poll_index] = True
312
- return False, False
313
-
314
-
315
- def mod_launch_rpc(mode, mod, args, outputs, tuning=False):
316
- """
317
- launch rpc or rpc_cloud module with retry.
318
-
319
- Note:
320
- To minimize waiting time of struggler RPC servers, we wait for a short timeout and spawn
321
- a new thread after the timeout.
322
- In normal case, RPC would complete before the short timeout, so, only one thread will be created.
323
- When the RPC server is slow, we create multiple threads that run concurrently.
324
- We wait for the first thread that successfully completes its work and return the result.
325
- If a thread fails (an exception is raised), we spawn a new thread to retry.
326
- Newly spawned threads will use different RPC servers.
327
- We bound the maximum number of threads, i.e. maximum number of retries.
328
- """
329
- max_num_threads = 5
330
-
331
- import operator
332
- arg_filter = filter(lambda x: isinstance(x, np.ndarray), args)
333
- arg_tensor = list(arg_filter)
334
- tensor_size = reduce(operator.add, (reduce(operator.mul, arg.shape) for arg in arg_tensor))
335
- expected_upload_speed = 5e6
336
- expected_upload_time = int(tensor_size / expected_upload_speed)
337
-
338
- timeout_before_spawning_new_thread = 200 + expected_upload_time
339
- poll_interval = 1
340
- thread_timeout = 400 + expected_upload_time * 3
341
-
342
- load_rpc_server_info(mode)
343
-
344
- threads = [None] * max_num_threads
345
- results = [None] * max_num_threads
346
- need_retry = [None] * max_num_threads
347
- retried = [False] * max_num_threads
348
- for thread_index in range(max_num_threads):
349
- if thread_index > 0:
350
- logging.error("Thread %d run for %d seconds, spawn a new thread to retry",
351
- (thread_index - 1), timeout_before_spawning_new_thread)
352
- threads[thread_index] = Thread(target=mod_launch_rpc_thread,
353
- args=(mode, mod, args, outputs, results, need_retry, thread_index, tuning))
354
- # daemonize the thread to prevent long running threads from hanging the whole process
355
- threads[thread_index].daemon = True
356
- threads[thread_index].start()
357
- poll_count = timeout_before_spawning_new_thread // poll_interval
358
- has_res, res = _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried)
359
- if has_res:
360
- return res
361
-
362
- logging.error("All %d threads are created, poll the threads until the first one exits normally, \
363
- or all threads exit abnormally or timeout", max_num_threads)
364
- poll_count = thread_timeout // poll_interval
365
- for _ in range(poll_count):
366
- threads[max_num_threads - 1].join(poll_interval)
367
- exit_thread_count = 0
368
- for poll_index in range(max_num_threads):
369
- if not threads[poll_index].is_alive() and not need_retry[poll_index]:
370
- return results[poll_index]
371
- if not threads[poll_index].is_alive():
372
- exit_thread_count += 1
373
- if exit_thread_count == max_num_threads:
374
- logging.error("All %d threads exit abnormally", max_num_threads)
375
- return None
376
-
377
- logging.error("All %d threads timeout", max_num_threads)
378
- return None
379
-
380
-
381
- def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
174
+ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=None):
382
175
  """
383
176
  Function for collecting cycle data from device.
384
177
 
@@ -389,23 +182,45 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
389
182
  tuning: tuning model.
390
183
  device_id: device_id on device.
391
184
  """
392
- akg.tvm.get_global_func("ascend_start_profiling")(device_id)
185
+ akg.tvm.get_global_func("ascend_start_profiling")(kernel_name)
393
186
  time_before_launch = time.time()
394
187
  output_data = ascend_run(kernel_name, args, outputs, device_id)
395
188
  akg.tvm.get_global_func("ascend_stop_profiling")()
396
-
397
- cycle = profiling_analyse(device_id, time_before_launch)
398
- logging.info('=====parsing cycles==============================')
189
+ cycle = 0
190
+ if arch is not None and "910B" in arch:
191
+ # for ascend910B profiling
192
+ cycle = profiling_analyse_910B(time_before_launch)
193
+ else:
194
+ cycle = profiling_analyse(device_id, time_before_launch)
195
+ logging.info('=====Task Duration(us)==============================')
399
196
  if cycle != PROF_ERROR_CODE:
400
197
  logging.info(cycle)
401
198
  else:
402
- logging.error("OOPS, can't correctly parsing cycles!")
199
+ logging.error("OOPS, can't correctly Task Duration!")
403
200
  TestUtils.record_cycle(cycle)
404
- logging.info('=====parsing cycles==============================')
405
- if tuning:
406
- return output_data, {'run_time': cycle}
407
- return output_data
408
-
201
+ logging.info('=====Task Duration(us)==============================')
202
+ return output_data, {'run_time': cycle}
203
+
204
+ def profiling_analyse_910B(time_before_launch):
205
+ public_path = os.getenv('PROFILING_DIR')
206
+ if public_path is None:
207
+ raise RuntimeError("Environment PROFILING_DIR not set!")
208
+ public_path = validate_and_normalize_path(public_path)
209
+ CANNFileParser(public_path).export_cann_profiling()
210
+ cann_file_parser = OpSummaryParser(public_path)
211
+ profiler_file = cann_file_parser._profiler_path
212
+ logging.debug("prof file is: %s", os.path.basename(profiler_file))
213
+ file_create_time = os.path.getctime(profiler_file)
214
+ if file_create_time < time_before_launch:
215
+ raise RuntimeError("The PROF file is too old")
216
+ datas:dict = cann_file_parser.generate_op_summary_data()
217
+ task_duration = float(datas.get(OpSummaryHeaders.TASK_DURATION,max_time_consume))
218
+ # # aic_total_cycles means ai core cycle
219
+ # # aiv_total_cycles means ai vector cycle
220
+ # aiv_total_cycle = int(datas.get(OpSummaryHeaders.AIV_TOTAL_CYCLES,max_time_consume))
221
+ # aic_total_cycle = int(datas.get(OpSummaryHeaders.AIC_TOTAL_CYCLES,max_time_consume))
222
+ # return aiv_total_cycle+aic_total_cycle
223
+ return task_duration
409
224
 
410
225
  def profiling_analyse(device_id, time_before_launch):
411
226
  """analyse profiling."""
@@ -559,12 +374,12 @@ def get_kernel_name_from_mod(mod):
559
374
  return kernel_name
560
375
 
561
376
 
562
- def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1):
377
+ def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1, arch=None):
563
378
  gc.collect()
564
379
  if device_id == -1:
565
380
  device_id = int(os.environ.get("DEVICE_ID", 0))
566
381
  kernel_name = get_kernel_name_from_mod(mod)
567
- return profiling_mode_run(kernel_name, args, outputs, tuning, device_id)
382
+ return profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=arch)
568
383
 
569
384
 
570
385
  def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, device_id=-1, repeat_time=400):
@@ -598,7 +413,7 @@ def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, devi
598
413
 
599
414
 
600
415
  @func_time_required
601
- def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400):
416
+ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400, arch=None):
602
417
  """
603
418
  unified run CCE kernel api.
604
419
 
@@ -609,7 +424,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
609
424
  tuning (bool): tuning model.
610
425
  device_id: device_id on device.
611
426
  expect: when mode in ["compile_cloud", "compile_mini"], return it.
612
-
427
+ arch: Ascend arch type
613
428
  Returns:
614
429
  output numpy array, or tuple of numpy array if multi-output.
615
430
  """
@@ -618,6 +433,20 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
618
433
  if device_id == -1:
619
434
  device_id = int(os.environ.get("DEVICE_ID", 0))
620
435
 
436
+ # npu-inference process
437
+ if isinstance(mod, str):
438
+ kernel_name = mod
439
+ run_func = ascend_run
440
+ run_args = [kernel_name, args, outputs, device_id]
441
+ if os.environ.get("PROFILING_MODE") == "true":
442
+ run_func = profiling_mode_run
443
+ run_args = [kernel_name, args, outputs, tuning, device_id, arch]
444
+ if os.environ.get("PROFILING_DIR", None) is None:
445
+ os.environ["PROFILING_DIR"] = "."
446
+ logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
447
+ output = run_func(*run_args)
448
+ return output
449
+
621
450
  module = mod if mod.type_key == LLVM else mod.imported_modules[0]
622
451
  target = module.type_key
623
452
  if target == LLVM or target == CUDA:
@@ -635,8 +464,6 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
635
464
  return output
636
465
  ra_util.get_ticks(stat_info)
637
466
  return output, stat_info
638
- if mode in ('rpc', 'rpc_cloud'):
639
- return mod_launch_rpc(mode, mod, args, outputs, tuning)
640
467
 
641
468
  # The air_cloud is the current default mode and needs to be modified in the future
642
469
  if mode == 'air_cloud':
@@ -658,7 +485,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
658
485
  mod(*tvm_array)
659
486
  return tvm_array[-1].asnumpy()
660
487
 
661
- raise ValueError("mode must be aic, rpc, aic_cloud, ca, compile_cloud, compile_mini, cpu, csim, ccesim or cdiff")
488
+ raise ValueError("mode must be aic, aic_cloud, ca, compile_cloud, compile_mini, cpu, csim, ccesim or cdiff")
662
489
 
663
490
 
664
491
  def _extract_shape_dtype(input_shapes, input_types):
@@ -1004,44 +831,6 @@ def _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral
1004
831
  return mod
1005
832
 
1006
833
 
1007
- def _create_gpu_tuning_mod(sch_tmpl, shape_var, kernel_name, attrs, binds):
1008
- """Create tuning module on gpu."""
1009
- @autotvm.template
1010
- def _autotune_template():
1011
- s = sch_tmpl['schedule'](sch_tmpl['output'])
1012
- return s, op_var
1013
-
1014
- # create autotune task
1015
- task = autotvm.task.create(_autotune_template, args=list(), target='cuda')
1016
- print("task config: ", task.config_space)
1017
-
1018
- # set measure_option
1019
- measure_option = autotvm.measure_option(
1020
- builder=autotvm.LocalBuilder(),
1021
- runner=autotvm.LocalRunner(repeat=5, min_repeat_ms=150, timeout=4)
1022
- )
1023
-
1024
- # Begin tuning, log records to file `kernel_name.log`
1025
- tuner = autotvm.tuner.RandomTuner(task)
1026
- if not os.path.exists(kernel_name + '.log'):
1027
- tuner.tune(n_trial=len(task.config_space),
1028
- measure_option=measure_option,
1029
- callbacks=[autotvm.callback.log_to_file(kernel_name + '.log')])
1030
-
1031
- # query best config
1032
- dispatch_context = autotvm.apply_history_best(kernel_name + '.log')
1033
- best_config = dispatch_context.query(task.target, task.workload)
1034
- print("\nBest config is:")
1035
- print(best_config)
1036
-
1037
- # apply best config
1038
- with autotvm.apply_history_best(kernel_name + '.log'):
1039
- s, op_var = _autotune_template()
1040
- mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs,
1041
- polyhedral=False, binds=binds)
1042
- return mod
1043
-
1044
-
1045
834
  def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir, dump_code,
1046
835
  tuning):
1047
836
  """
@@ -1079,7 +868,7 @@ def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs,
1079
868
  s = sch_tmpl['schedule'](sch_tmpl['output'])
1080
869
  mod = _create_gpu_mod(s, op_var, "cuda", shape_var, kernel_name, attrs, False, binds, dump_ir)
1081
870
  else:
1082
- mod = _create_gpu_tuning_mod(sch_tmpl, shape_var, kernel_name, attrs, binds)
871
+ raise ValueError("Tuning is not supported.")
1083
872
  else:
1084
873
  mod = _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir)
1085
874
  if dump_code:
@@ -1213,6 +1002,10 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
1213
1002
  compute_func(s)
1214
1003
  polyhedral = False
1215
1004
 
1005
+ if attrs.get("simple_mode"):
1006
+ attrs.pop("simple_mode")
1007
+ return s, inputs, output, attrs
1008
+
1216
1009
  level = attrs.get("help_tiling") if attrs and "help_tiling" in attrs else None
1217
1010
  if tuning or (level is not None and level > help_tiling_level.get('None')):
1218
1011
  return gen_spaces_dim_key(op_func, args, s, op_var, kernel_name, attrs, polyhedral, tuning, target)
@@ -1231,10 +1024,11 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
1231
1024
  polyhedral=polyhedral, binds=binds)
1232
1025
  source_code = mod.get_source()
1233
1026
  elif target_name == CCE:
1234
- with akg.build_config(dump_pass_ir=dump_ir):
1235
- mod = akg.build(s, op_var, target, shape_var, name=kernel_name, attrs=attrs,
1236
- polyhedral=polyhedral, binds=binds)
1027
+ mod = npu_op_build(s, op_var, shape_var, kernel_name, binds, attrs, dump_ir, polyhedral)
1028
+ if attrs.get("is_tbe_codegen"):
1237
1029
  source_code = mod.imported_modules[0].get_source()
1030
+ else:
1031
+ return mod
1238
1032
 
1239
1033
  if log_code:
1240
1034
  logging.debug("#################code####################")
@@ -1244,11 +1038,41 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
1244
1038
  return mod
1245
1039
 
1246
1040
 
1041
+ def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
1042
+ dump_ir=True, polyhedral=True):
1043
+ if attrs.get("is_tbe_codegen"):
1044
+ # use akg + tbe compile
1045
+ from akg.tvm import build_module
1046
+ from akg.python.akg.utils.tbe_codegen_utils import build_tbe_codegen
1047
+ if attrs is None:
1048
+ attrs = {}
1049
+ attrs.update({"is_tbe_codegen":True})
1050
+ binds, arg_list = build_module.get_binds(op_var)
1051
+ stmt = akg.lower(s, op_var, shape_params=shape_var, name=kernel_name, binds=binds, attrs=attrs,
1052
+ simple_mode=True, polyhedral=polyhedral, tuning=False, target="cce")
1053
+
1054
+ json_str = akg.tvm.save_json(stmt, "0.8.0")
1055
+
1056
+ args_json = []
1057
+ for buf in enumerate(arg_list):
1058
+ args_json.append(akg.tvm.save_json(buf, "0.8.0"))
1059
+
1060
+ is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs)
1061
+ if not is_success:
1062
+ raise TypeError("npu_inference codegen failed.")
1063
+ return kernel_name
1064
+ else:
1065
+ # use the whole akg complie
1066
+ with akg.build_config(dump_pass_ir=dump_ir):
1067
+ mod = akg.build(s, op_var, CCE, shape_var, name=kernel_name, attrs=attrs,
1068
+ polyhedral=polyhedral, binds=binds)
1069
+ return mod
1070
+
1247
1071
  def get_runtime_mode():
1248
1072
  """get runtime mode."""
1249
1073
  env_dic = os.environ
1250
1074
  if not env_dic.get('RUNTIME_MODE'):
1251
- mode = 'rpc_cloud'
1075
+ mode = 'aic_cloud'
1252
1076
  else:
1253
1077
  mode = env_dic.get('RUNTIME_MODE')
1254
1078
  return mode
@@ -1265,7 +1089,7 @@ def get_profiling_mode():
1265
1089
  def product_is_mini():
1266
1090
  """check whether in mini environment."""
1267
1091
  mode = get_runtime_mode()
1268
- if mode in ('rpc', 'air', 'aic', 'compile_mini'):
1092
+ if mode in ('air', 'aic', 'compile_mini'):
1269
1093
  return True
1270
1094
  return False
1271
1095
 
@@ -351,16 +351,8 @@ def _collect_inputs(input_desc):
351
351
  return inputs
352
352
 
353
353
 
354
- def _get_op_attr(op_name, attrs, attr_name):
355
- """Get op attr value."""
356
- for attr in attrs:
357
- if attr["name"] == attr_name:
358
- return attr["value"]
359
- raise ValueError("Can not find attr '{}' in op {}".format(attr_name, op_name))
360
-
361
-
362
354
  def precision_analyze(desc: dict, tensors):
363
- exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "InplaceAssign", "Greater",
355
+ exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "Greater",
364
356
  "SelectGT", "SelectLT", "LessEqual", "Less", "EquivFormat", "ExpandDims", "Transpose",
365
357
  "TransData", "BroadcastTo", "Assign"]
366
358
  input_tensors = _collect_inputs(desc["input_desc"])
@@ -369,21 +361,9 @@ def precision_analyze(desc: dict, tensors):
369
361
  graph = {}
370
362
  ops = {} # recorder the operator that generates the current output
371
363
  for op in desc["op_desc"]:
372
- if op["name"] == "InplaceAssign":
373
- output = IOInfo(op["input_desc"][0][0]["tensor_name"], op["input_desc"][0][0]["data_type"])
374
- inputs = IOInfo(op["input_desc"][1][0]["tensor_name"], op["input_desc"][1][0]["data_type"])
375
- graph[output] = [inputs]
376
- ops[output] = op["name"]
377
- fake_output = _get_op_attr(op["name"], op["attr"], "fake_output")
378
- if not fake_output:
379
- output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
380
- inputs = IOInfo(op["input_desc"][2][0]["tensor_name"], op["input_desc"][2][0]["data_type"])
381
- graph[output] = [inputs]
382
- ops[output] = op["name"]
383
- else:
384
- output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
385
- graph[output] = _collect_inputs(op["input_desc"])
386
- ops[output] = op["name"]
364
+ output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
365
+ graph[output] = _collect_inputs(op["input_desc"])
366
+ ops[output] = op["name"]
387
367
 
388
368
  def _precision_reduce(x: IOInfo):
389
369
  if x in input_tensors: