mindspore 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/ConcurrencyCheck.dll +0 -0
  3. mindspore/CppBuildInsights.dll +0 -0
  4. mindspore/CppCoreCheck.dll +0 -0
  5. mindspore/EnumIndex.dll +0 -0
  6. mindspore/EspXEngine.dll +0 -0
  7. mindspore/HResultCheck.dll +0 -0
  8. mindspore/KernelTraceControl.dll +0 -0
  9. mindspore/LocalESPC.dll +0 -0
  10. mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
  11. mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
  12. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  13. mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
  14. mindspore/Newtonsoft.Json.dll +0 -0
  15. mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
  16. mindspore/VariantClear.dll +0 -0
  17. mindspore/__init__.py +9 -4
  18. mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
  19. mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
  20. mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
  21. mindspore/_check_jit_forbidden_api.py +102 -0
  22. mindspore/_checkparam.py +1066 -1001
  23. mindspore/_extends/builtin_operations.py +32 -4
  24. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  25. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  26. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  27. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  28. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  29. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  30. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  31. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  32. mindspore/_extends/parse/__init__.py +5 -3
  33. mindspore/_extends/parse/namespace.py +17 -2
  34. mindspore/_extends/parse/parser.py +193 -34
  35. mindspore/_extends/parse/resources.py +7 -8
  36. mindspore/_extends/parse/standard_method.py +1780 -435
  37. mindspore/_extends/parse/trope.py +3 -1
  38. mindspore/amp.py +53 -58
  39. mindspore/atlprov.dll +0 -0
  40. mindspore/boost/adasum.py +3 -2
  41. mindspore/boost/boost.py +2 -2
  42. mindspore/boost/boost_cell_wrapper.py +46 -26
  43. mindspore/boost/dim_reduce.py +6 -5
  44. mindspore/boost/grad_accumulation.py +2 -1
  45. mindspore/boost/group_loss_scale_manager.py +1 -1
  46. mindspore/c1.dll +0 -0
  47. mindspore/c1xx.dll +0 -0
  48. mindspore/c2.dll +0 -0
  49. mindspore/cfgpersist.dll +0 -0
  50. mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
  51. mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
  52. mindspore/common/__init__.py +11 -10
  53. mindspore/common/_decorator.py +2 -0
  54. mindspore/common/_register_for_adapter.py +55 -0
  55. mindspore/common/_stub_tensor.py +201 -0
  56. mindspore/common/_utils.py +57 -0
  57. mindspore/common/api.py +582 -297
  58. mindspore/common/dtype.py +66 -18
  59. mindspore/common/dump.py +2 -2
  60. mindspore/common/initializer.py +38 -1
  61. mindspore/common/jit_config.py +25 -13
  62. mindspore/common/mutable.py +53 -24
  63. mindspore/common/parameter.py +60 -37
  64. mindspore/common/seed.py +8 -24
  65. mindspore/common/sparse_tensor.py +927 -0
  66. mindspore/common/tensor.py +1627 -3900
  67. mindspore/communication/__init__.py +10 -5
  68. mindspore/communication/_comm_helper.py +78 -214
  69. mindspore/communication/_hccl_management.py +2 -1
  70. mindspore/communication/management.py +136 -47
  71. mindspore/config/op_info.config +501 -1008
  72. mindspore/context.py +291 -56
  73. mindspore/d3dcompiler_47.dll +0 -0
  74. mindspore/dataset/__init__.py +12 -8
  75. mindspore/dataset/audio/__init__.py +9 -9
  76. mindspore/dataset/audio/transforms.py +1090 -228
  77. mindspore/dataset/audio/utils.py +87 -39
  78. mindspore/dataset/audio/validators.py +223 -1
  79. mindspore/dataset/callback/ds_callback.py +17 -15
  80. mindspore/dataset/core/config.py +246 -17
  81. mindspore/dataset/core/py_util_helpers.py +4 -3
  82. mindspore/dataset/core/validator_helpers.py +10 -10
  83. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  84. mindspore/dataset/debug/debug_hook.py +65 -0
  85. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  86. mindspore/dataset/engine/__init__.py +7 -3
  87. mindspore/dataset/engine/cache_client.py +9 -9
  88. mindspore/dataset/engine/datasets.py +648 -477
  89. mindspore/dataset/engine/datasets_audio.py +165 -167
  90. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  91. mindspore/dataset/engine/datasets_text.py +492 -342
  92. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  93. mindspore/dataset/engine/datasets_vision.py +1224 -699
  94. mindspore/dataset/engine/graphdata.py +134 -69
  95. mindspore/dataset/engine/iterators.py +50 -9
  96. mindspore/dataset/engine/offload.py +52 -31
  97. mindspore/dataset/engine/samplers.py +27 -24
  98. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  99. mindspore/dataset/engine/validators.py +213 -52
  100. mindspore/dataset/text/__init__.py +10 -8
  101. mindspore/dataset/text/transforms.py +152 -57
  102. mindspore/dataset/text/utils.py +98 -49
  103. mindspore/dataset/text/validators.py +25 -0
  104. mindspore/dataset/transforms/__init__.py +4 -2
  105. mindspore/dataset/transforms/c_transforms.py +11 -13
  106. mindspore/dataset/transforms/py_transforms.py +2 -2
  107. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  108. mindspore/dataset/transforms/transforms.py +13 -15
  109. mindspore/dataset/transforms/validators.py +7 -7
  110. mindspore/dataset/utils/__init__.py +2 -1
  111. mindspore/dataset/utils/browse_dataset.py +13 -13
  112. mindspore/dataset/utils/line_reader.py +121 -0
  113. mindspore/dataset/vision/__init__.py +8 -7
  114. mindspore/dataset/vision/c_transforms.py +125 -126
  115. mindspore/dataset/vision/py_transforms.py +37 -37
  116. mindspore/dataset/vision/py_transforms_util.py +23 -20
  117. mindspore/dataset/vision/transforms.py +316 -315
  118. mindspore/dataset/vision/utils.py +313 -17
  119. mindspore/dataset/vision/validators.py +6 -6
  120. mindspore/default_config.py +0 -1
  121. mindspore/dpcmi.dll +0 -0
  122. mindspore/{compression → experimental}/__init__.py +6 -5
  123. mindspore/experimental/map_parameter.py +275 -0
  124. mindspore/include/OWNERS +0 -1
  125. mindspore/include/api/callback/callback.h +9 -13
  126. mindspore/include/api/callback/ckpt_saver.h +2 -2
  127. mindspore/include/api/callback/loss_monitor.h +2 -2
  128. mindspore/include/api/callback/lr_scheduler.h +5 -5
  129. mindspore/include/api/callback/time_monitor.h +2 -2
  130. mindspore/include/api/callback/train_accuracy.h +4 -6
  131. mindspore/include/api/cfg.h +19 -6
  132. mindspore/include/api/context.h +70 -9
  133. mindspore/include/api/delegate.h +8 -1
  134. mindspore/include/api/dual_abi_helper.h +8 -24
  135. mindspore/include/api/metrics/accuracy.h +2 -2
  136. mindspore/include/api/metrics/metrics.h +4 -3
  137. mindspore/include/api/model.h +9 -4
  138. mindspore/include/api/model_group.h +68 -0
  139. mindspore/include/api/model_parallel_runner.h +17 -17
  140. mindspore/include/api/net.h +12 -11
  141. mindspore/include/api/serialization.h +20 -4
  142. mindspore/include/api/status.h +7 -1
  143. mindspore/include/api/types.h +25 -21
  144. mindspore/include/api/visible.h +4 -0
  145. mindspore/include/c_api/model_c.h +5 -0
  146. mindspore/include/c_api/status_c.h +1 -1
  147. mindspore/include/dataset/config.h +1 -1
  148. mindspore/include/dataset/constants.h +14 -0
  149. mindspore/include/dataset/text.h +59 -0
  150. mindspore/include/dataset/vision.h +56 -117
  151. mindspore/include/dataset/vision_lite.h +102 -0
  152. mindspore/jpeg62.dll +0 -0
  153. mindspore/log.py +28 -28
  154. mindspore/mindrecord/common/exceptions.py +2 -4
  155. mindspore/mindrecord/filereader.py +19 -1
  156. mindspore/mindrecord/filewriter.py +250 -88
  157. mindspore/mindrecord/mindpage.py +13 -13
  158. mindspore/mindrecord/shardheader.py +15 -15
  159. mindspore/mindrecord/shardreader.py +9 -0
  160. mindspore/mindrecord/shardwriter.py +29 -29
  161. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  162. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  163. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  164. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  165. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  166. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  167. mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
  168. mindspore/mindspore_common.dll +0 -0
  169. mindspore/mindspore_core.dll +0 -0
  170. mindspore/mindspore_glog.dll +0 -0
  171. mindspore/mindspore_shared_lib.dll +0 -0
  172. mindspore/msobj140.dll +0 -0
  173. mindspore/mspdb140.dll +0 -0
  174. mindspore/mspdbcore.dll +0 -0
  175. mindspore/mspdbst.dll +0 -0
  176. mindspore/mspft140.dll +0 -0
  177. mindspore/msvcdis140.dll +0 -0
  178. mindspore/msvcp140_1.dll +0 -0
  179. mindspore/msvcp140_2.dll +0 -0
  180. mindspore/msvcp140_atomic_wait.dll +0 -0
  181. mindspore/msvcp140_codecvt_ids.dll +0 -0
  182. mindspore/nn/__init__.py +1 -5
  183. mindspore/nn/cell.py +297 -234
  184. mindspore/nn/dynamic_lr.py +1 -1
  185. mindspore/nn/grad/cell_grad.py +17 -42
  186. mindspore/nn/layer/__init__.py +7 -4
  187. mindspore/nn/layer/activation.py +131 -88
  188. mindspore/nn/layer/basic.py +313 -613
  189. mindspore/nn/layer/channel_shuffle.py +103 -0
  190. mindspore/nn/layer/combined.py +1 -1
  191. mindspore/nn/layer/container.py +52 -6
  192. mindspore/nn/layer/conv.py +112 -43
  193. mindspore/nn/layer/dense.py +10 -9
  194. mindspore/nn/layer/embedding.py +36 -34
  195. mindspore/nn/layer/image.py +123 -27
  196. mindspore/nn/layer/math.py +108 -107
  197. mindspore/nn/layer/normalization.py +212 -366
  198. mindspore/nn/layer/padding.py +370 -42
  199. mindspore/nn/layer/pooling.py +1443 -219
  200. mindspore/nn/layer/rnn_cells.py +11 -16
  201. mindspore/nn/layer/rnns.py +38 -39
  202. mindspore/nn/layer/thor_layer.py +24 -25
  203. mindspore/nn/layer/timedistributed.py +5 -5
  204. mindspore/nn/layer/transformer.py +701 -0
  205. mindspore/nn/learning_rate_schedule.py +8 -8
  206. mindspore/nn/loss/__init__.py +9 -6
  207. mindspore/nn/loss/loss.py +678 -142
  208. mindspore/nn/metrics.py +53 -0
  209. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  210. mindspore/nn/optim/ada_grad.py +8 -8
  211. mindspore/nn/optim/adadelta.py +2 -3
  212. mindspore/nn/optim/adafactor.py +18 -14
  213. mindspore/nn/optim/adam.py +429 -87
  214. mindspore/nn/optim/adamax.py +5 -6
  215. mindspore/nn/optim/adasum.py +10 -8
  216. mindspore/nn/optim/asgd.py +7 -7
  217. mindspore/nn/optim/ftrl.py +81 -11
  218. mindspore/nn/optim/lamb.py +7 -8
  219. mindspore/nn/optim/lars.py +4 -4
  220. mindspore/nn/optim/lazyadam.py +82 -7
  221. mindspore/nn/optim/momentum.py +8 -7
  222. mindspore/nn/optim/optimizer.py +19 -10
  223. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  224. mindspore/nn/optim/rmsprop.py +3 -3
  225. mindspore/nn/optim/rprop.py +20 -16
  226. mindspore/nn/optim/sgd.py +21 -15
  227. mindspore/nn/optim/thor.py +23 -21
  228. mindspore/nn/probability/__init__.py +0 -2
  229. mindspore/nn/probability/bijector/bijector.py +7 -6
  230. mindspore/nn/probability/bijector/invert.py +4 -2
  231. mindspore/nn/probability/bijector/softplus.py +2 -2
  232. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  233. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  234. mindspore/nn/probability/distribution/__init__.py +6 -0
  235. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  236. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  237. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  238. mindspore/nn/probability/distribution/beta.py +1 -1
  239. mindspore/nn/probability/distribution/categorical.py +9 -9
  240. mindspore/nn/probability/distribution/cauchy.py +8 -8
  241. mindspore/nn/probability/distribution/distribution.py +12 -6
  242. mindspore/nn/probability/distribution/exponential.py +5 -5
  243. mindspore/nn/probability/distribution/gamma.py +3 -3
  244. mindspore/nn/probability/distribution/geometric.py +6 -5
  245. mindspore/nn/probability/distribution/gumbel.py +5 -5
  246. mindspore/nn/probability/distribution/half_normal.py +133 -0
  247. mindspore/nn/probability/distribution/laplace.py +128 -0
  248. mindspore/nn/probability/distribution/log_normal.py +0 -1
  249. mindspore/nn/probability/distribution/logistic.py +4 -5
  250. mindspore/nn/probability/distribution/normal.py +11 -15
  251. mindspore/nn/probability/distribution/poisson.py +6 -2
  252. mindspore/nn/probability/distribution/student_t.py +150 -0
  253. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  254. mindspore/nn/probability/distribution/uniform.py +5 -5
  255. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  256. mindspore/nn/reinforcement/tensor_array.py +2 -2
  257. mindspore/nn/sparse/sparse.py +8 -1
  258. mindspore/nn/wrap/cell_wrapper.py +55 -27
  259. mindspore/nn/wrap/grad_reducer.py +20 -11
  260. mindspore/nn/wrap/loss_scale.py +47 -30
  261. mindspore/numpy/array_creations.py +33 -22
  262. mindspore/numpy/array_ops.py +46 -42
  263. mindspore/numpy/logic_ops.py +6 -27
  264. mindspore/numpy/math_ops.py +26 -19
  265. mindspore/numpy/utils.py +1 -8
  266. mindspore/numpy/utils_const.py +112 -62
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/opencv_imgproc452.dll +0 -0
  270. mindspore/ops/__init__.py +6 -3
  271. mindspore/ops/_constants.py +0 -6
  272. mindspore/ops/_grad/__init__.py +2 -1
  273. mindspore/ops/_grad/grad_array_ops.py +209 -152
  274. mindspore/ops/_grad/grad_base.py +55 -17
  275. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  276. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  277. mindspore/ops/_grad/grad_implementations.py +21 -61
  278. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  279. mindspore/ops/_grad/grad_math_ops.py +306 -161
  280. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  281. mindspore/ops/_grad/grad_other_ops.py +1 -1
  282. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  283. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  284. mindspore/ops/_grad/grad_sparse.py +15 -9
  285. mindspore/ops/_grad_experimental/__init__.py +1 -0
  286. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  287. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  288. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  289. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  290. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  291. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  292. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  293. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  294. mindspore/ops/_op_impl/__init__.py +3 -3
  295. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  296. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  297. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  298. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  299. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  300. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  302. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  303. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  304. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  305. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  307. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  314. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  315. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  317. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  318. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  320. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  321. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  322. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  326. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  327. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  328. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  329. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  330. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  331. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  332. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  335. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  336. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  337. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  338. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  339. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  340. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  341. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  342. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  343. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  344. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  345. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  346. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  347. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  349. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  350. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  351. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  352. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  353. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  354. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  355. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  356. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  357. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  358. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  359. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  360. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  361. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  362. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  363. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  365. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  366. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  367. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  368. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  369. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  370. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  371. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  372. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  373. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  374. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  375. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  376. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  377. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  378. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  379. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  380. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  381. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  382. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  383. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  384. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  385. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  386. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  387. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  388. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  389. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  390. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  391. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  392. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  393. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  394. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  395. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  396. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  397. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  398. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  399. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  400. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  401. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  402. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  403. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  404. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  405. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  407. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  408. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  409. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  410. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  411. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  412. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  413. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  414. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  415. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  416. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  417. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  418. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  419. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  420. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  421. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  422. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  423. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  424. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  425. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  426. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  427. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  428. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  429. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  430. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  431. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  432. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  433. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  434. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  435. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  436. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  437. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  441. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  442. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  443. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  444. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  445. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  446. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  447. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  448. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  449. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  450. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  451. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  452. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  453. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  454. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  455. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  456. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  457. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  458. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  459. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  460. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  461. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  462. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  463. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  464. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  465. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  466. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  467. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  468. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  469. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  470. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  471. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  472. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  473. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  474. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  475. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  476. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  477. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  478. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  479. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  480. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  481. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  482. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  483. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  484. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  485. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  486. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  487. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  488. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  489. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  490. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  491. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  492. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  493. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  494. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  495. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  496. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  497. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  498. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  499. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  500. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  501. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  502. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  503. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  504. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  505. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  506. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  507. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  508. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  509. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  510. mindspore/ops/_primitive_cache.py +3 -2
  511. mindspore/ops/_register_for_op.py +11 -0
  512. mindspore/ops/_utils/__init__.py +1 -1
  513. mindspore/ops/_utils/utils.py +20 -41
  514. mindspore/ops/_vmap/__init__.py +2 -2
  515. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  516. mindspore/ops/_vmap/vmap_base.py +24 -10
  517. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  518. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  519. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  520. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  521. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  522. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  523. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  524. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  525. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  526. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  529. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  530. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  531. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  532. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  533. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  534. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  535. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  537. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  538. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  539. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  541. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  542. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  543. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  544. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  545. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  546. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  547. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  548. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  549. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  550. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  551. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  552. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  553. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  554. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  555. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  556. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  559. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  560. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  561. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  563. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  565. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  566. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  568. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  569. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  570. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  571. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  572. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  573. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  574. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  576. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  577. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  578. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  579. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  580. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  582. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  583. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  584. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  585. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  586. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  587. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  588. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  589. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  590. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  591. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  592. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  593. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  594. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  595. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  596. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  597. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  598. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  599. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  600. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  601. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  604. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  605. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  606. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  607. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  609. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  610. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  611. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  612. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  613. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  614. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  615. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  616. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  617. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  618. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  619. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  620. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  621. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  622. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  623. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  624. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  625. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  626. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  627. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  628. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  629. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  630. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  632. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  633. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  634. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  635. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  637. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  638. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  639. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  640. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  643. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  644. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  645. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  646. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  647. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  648. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  649. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  650. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  651. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  652. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  653. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  654. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  655. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  656. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  657. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  658. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  660. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  661. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  662. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  663. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  664. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  665. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  666. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  667. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  668. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  669. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  670. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  671. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  672. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  673. mindspore/ops/bprop_mindir/__init__.py +1 -4
  674. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  675. mindspore/ops/composite/__init__.py +12 -13
  676. mindspore/ops/composite/base.py +261 -254
  677. mindspore/ops/composite/env_ops.py +41 -0
  678. mindspore/ops/composite/math_ops.py +197 -156
  679. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  680. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  681. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  682. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  683. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  684. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  685. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  686. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  687. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  688. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  689. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  690. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  691. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  692. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  693. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  694. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  695. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  696. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  697. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  698. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  699. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  700. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  701. mindspore/ops/function/__init__.py +323 -8
  702. mindspore/ops/function/array_func.py +3511 -780
  703. mindspore/ops/function/clip_func.py +329 -0
  704. mindspore/ops/function/debug_func.py +6 -6
  705. mindspore/ops/function/grad/__init__.py +5 -1
  706. mindspore/ops/function/grad/grad_func.py +736 -65
  707. mindspore/ops/function/image_func.py +270 -0
  708. mindspore/ops/function/linalg_func.py +268 -8
  709. mindspore/ops/function/math_func.py +8032 -3164
  710. mindspore/ops/function/nn_func.py +5619 -1855
  711. mindspore/ops/function/other_func.py +115 -0
  712. mindspore/ops/function/parameter_func.py +11 -10
  713. mindspore/ops/function/random_func.py +939 -77
  714. mindspore/ops/function/sparse_func.py +249 -84
  715. mindspore/ops/function/sparse_unary_func.py +2303 -0
  716. mindspore/ops/function/spectral_func.py +146 -0
  717. mindspore/ops/function/vmap_func.py +114 -0
  718. mindspore/ops/functional.py +182 -254
  719. mindspore/ops/op_info_register.py +79 -34
  720. mindspore/ops/operations/__init__.py +210 -118
  721. mindspore/ops/operations/_csr_ops.py +7 -7
  722. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  723. mindspore/ops/operations/_grad_ops.py +447 -322
  724. mindspore/ops/operations/_inner_ops.py +547 -176
  725. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  726. mindspore/ops/operations/_ms_kernel.py +29 -27
  727. mindspore/ops/operations/_ocr_ops.py +11 -11
  728. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  729. mindspore/ops/operations/_quant_ops.py +186 -101
  730. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  731. mindspore/ops/operations/_scalar_ops.py +466 -0
  732. mindspore/ops/operations/_sequence_ops.py +1047 -0
  733. mindspore/ops/operations/_tensor_array.py +10 -11
  734. mindspore/ops/operations/_thor_ops.py +4 -4
  735. mindspore/ops/operations/array_ops.py +1428 -1226
  736. mindspore/ops/operations/comm_ops.py +180 -117
  737. mindspore/ops/operations/control_ops.py +4 -2
  738. mindspore/ops/operations/custom_ops.py +185 -98
  739. mindspore/ops/operations/debug_ops.py +92 -54
  740. mindspore/ops/operations/image_ops.py +406 -211
  741. mindspore/ops/operations/inner_ops.py +42 -53
  742. mindspore/ops/operations/linalg_ops.py +32 -29
  743. mindspore/ops/operations/math_ops.py +2076 -897
  744. mindspore/ops/operations/nn_ops.py +1282 -1252
  745. mindspore/ops/operations/other_ops.py +124 -278
  746. mindspore/ops/operations/random_ops.py +345 -178
  747. mindspore/ops/operations/rl_ops.py +8 -9
  748. mindspore/ops/operations/sparse_ops.py +502 -157
  749. mindspore/ops/operations/spectral_ops.py +107 -0
  750. mindspore/ops/primitive.py +192 -15
  751. mindspore/ops/vm_impl_registry.py +23 -2
  752. mindspore/parallel/__init__.py +6 -1
  753. mindspore/parallel/_auto_parallel_context.py +199 -92
  754. mindspore/parallel/_cell_wrapper.py +4 -2
  755. mindspore/parallel/_cost_model_context.py +3 -0
  756. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  757. mindspore/parallel/_offload_context.py +185 -0
  758. mindspore/parallel/_parallel_serialization.py +167 -28
  759. mindspore/parallel/_ps_context.py +9 -5
  760. mindspore/parallel/_recovery_context.py +1 -1
  761. mindspore/parallel/_tensor.py +9 -1
  762. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  763. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  764. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  765. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  766. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  767. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  768. mindspore/parallel/_utils.py +47 -7
  769. mindspore/parallel/algo_parameter_config.py +5 -1
  770. mindspore/parallel/checkpoint_transform.py +329 -0
  771. mindspore/parallel/shard.py +229 -0
  772. mindspore/perf_msvcbuildinsights.dll +0 -0
  773. mindspore/pgodb140.dll +0 -0
  774. mindspore/pgort140.dll +0 -0
  775. mindspore/profiler/__init__.py +2 -1
  776. mindspore/profiler/common/util.py +4 -3
  777. mindspore/profiler/common/validator/validate_path.py +2 -2
  778. mindspore/profiler/envprofiling.py +249 -0
  779. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  780. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  781. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  782. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  783. mindspore/profiler/parser/framework_parser.py +42 -16
  784. mindspore/profiler/parser/hccl_parser.py +158 -158
  785. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  786. mindspore/profiler/parser/integrator.py +18 -1579
  787. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  788. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  789. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  790. mindspore/profiler/parser/optime_parser.py +17 -18
  791. mindspore/profiler/parser/profiler_info.py +108 -0
  792. mindspore/profiler/parser/step_trace_parser.py +1 -1
  793. mindspore/profiler/profiling.py +396 -194
  794. mindspore/rewrite/__init__.py +6 -2
  795. mindspore/rewrite/api/node.py +51 -110
  796. mindspore/rewrite/api/node_type.py +10 -6
  797. mindspore/rewrite/api/pattern_engine.py +51 -7
  798. mindspore/rewrite/api/scoped_value.py +64 -53
  799. mindspore/rewrite/api/symbol_tree.py +108 -61
  800. mindspore/rewrite/api/tree_node_helper.py +2 -3
  801. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  802. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  803. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  804. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  805. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  806. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  807. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  808. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  809. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  810. mindspore/rewrite/common/__init__.py +2 -0
  811. mindspore/rewrite/common/event.py +1 -1
  812. mindspore/rewrite/common/observable.py +1 -1
  813. mindspore/rewrite/common/observer.py +1 -1
  814. mindspore/rewrite/common/rewrite_elog.py +35 -0
  815. mindspore/rewrite/namer.py +2 -2
  816. mindspore/rewrite/namespace.py +14 -4
  817. mindspore/rewrite/node.py +161 -13
  818. mindspore/rewrite/parser.py +0 -1
  819. mindspore/rewrite/parser_register.py +0 -1
  820. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  821. mindspore/rewrite/parsers/assign_parser.py +267 -67
  822. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  823. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  824. mindspore/rewrite/parsers/constant_parser.py +101 -0
  825. mindspore/rewrite/parsers/container_parser.py +88 -0
  826. mindspore/rewrite/parsers/for_parser.py +28 -15
  827. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  828. mindspore/rewrite/parsers/if_parser.py +11 -28
  829. mindspore/rewrite/parsers/module_parser.py +9 -6
  830. mindspore/rewrite/parsers/return_parser.py +3 -2
  831. mindspore/rewrite/sparsify/__init__.py +0 -0
  832. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  833. mindspore/rewrite/sparsify/sparsify.py +109 -0
  834. mindspore/rewrite/sparsify/utils.py +173 -0
  835. mindspore/rewrite/symbol_tree.py +322 -109
  836. mindspore/rewrite/symbol_tree_builder.py +45 -8
  837. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  838. mindspore/rewrite/topological_manager.py +1 -2
  839. mindspore/run_check/_check_version.py +209 -112
  840. mindspore/run_check/run_check.py +2 -1
  841. mindspore/tbbmalloc.dll +0 -0
  842. mindspore/tinyxml2.dll +0 -0
  843. mindspore/train/__init__.py +6 -4
  844. mindspore/train/_utils.py +28 -5
  845. mindspore/train/amp.py +321 -50
  846. mindspore/train/callback/__init__.py +3 -1
  847. mindspore/train/callback/_backup_and_restore.py +120 -0
  848. mindspore/train/callback/_callback.py +8 -8
  849. mindspore/train/callback/_checkpoint.py +12 -9
  850. mindspore/train/callback/_early_stop.py +13 -7
  851. mindspore/train/callback/_history.py +8 -8
  852. mindspore/train/callback/_lambda_callback.py +6 -6
  853. mindspore/train/callback/_landscape.py +36 -38
  854. mindspore/train/callback/_loss_monitor.py +12 -6
  855. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  856. mindspore/train/callback/_on_request_exit.py +212 -0
  857. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  858. mindspore/train/callback/_summary_collector.py +27 -19
  859. mindspore/train/callback/_time_monitor.py +13 -7
  860. mindspore/train/checkpoint_pb2.py +68 -8
  861. mindspore/train/data_sink.py +122 -33
  862. mindspore/train/dataset_helper.py +28 -87
  863. mindspore/train/loss_scale_manager.py +4 -7
  864. mindspore/{nn → train}/metrics/__init__.py +20 -20
  865. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  866. mindspore/{nn → train}/metrics/auc.py +4 -4
  867. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  868. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  869. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  870. mindspore/{nn → train}/metrics/dice.py +6 -5
  871. mindspore/{nn → train}/metrics/error.py +7 -5
  872. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  873. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  874. mindspore/{nn → train}/metrics/loss.py +4 -3
  875. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  876. mindspore/{nn → train}/metrics/metric.py +6 -5
  877. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  878. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  879. mindspore/{nn → train}/metrics/precision.py +5 -4
  880. mindspore/{nn → train}/metrics/recall.py +5 -4
  881. mindspore/{nn → train}/metrics/roc.py +7 -6
  882. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  883. mindspore/{nn → train}/metrics/topk.py +7 -5
  884. mindspore/train/mind_ir_pb2.py +339 -32
  885. mindspore/train/model.py +113 -84
  886. mindspore/train/serialization.py +547 -167
  887. mindspore/train/summary/_summary_adapter.py +1 -1
  888. mindspore/train/summary/summary_record.py +43 -12
  889. mindspore/train/train_thor/convert_utils.py +7 -1
  890. mindspore/train/train_thor/dataset_helper.py +3 -3
  891. mindspore/train/train_thor/model_thor.py +0 -4
  892. mindspore/turbojpeg.dll +0 -0
  893. mindspore/vcmeta.dll +0 -0
  894. mindspore/vcruntime140.dll +0 -0
  895. mindspore/vcruntime140_1.dll +0 -0
  896. mindspore/version.py +1 -1
  897. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  898. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
  899. mindspore/compression/common/constant.py +0 -124
  900. mindspore/compression/export/__init__.py +0 -19
  901. mindspore/compression/export/quant_export.py +0 -514
  902. mindspore/compression/quant/qat.py +0 -636
  903. mindspore/compression/quant/quant_utils.py +0 -462
  904. mindspore/compression/quant/quantizer.py +0 -68
  905. mindspore/libatomic-1.dll +0 -0
  906. mindspore/libgcc_s_seh-1.dll +0 -0
  907. mindspore/libgfortran-4.dll +0 -0
  908. mindspore/libgomp-1.dll +0 -0
  909. mindspore/libjpeg-62.dll +0 -0
  910. mindspore/libmindspore.dll +0 -0
  911. mindspore/libmindspore_common.dll +0 -0
  912. mindspore/libmindspore_core.dll +0 -0
  913. mindspore/libmindspore_glog.dll +0 -0
  914. mindspore/libnnacl.dll +0 -0
  915. mindspore/libopencv_core452.dll +0 -0
  916. mindspore/libopencv_imgcodecs452.dll +0 -0
  917. mindspore/libopencv_imgproc452.dll +0 -0
  918. mindspore/libquadmath-0.dll +0 -0
  919. mindspore/libsqlite3.dll +0 -0
  920. mindspore/libssp-0.dll +0 -0
  921. mindspore/libstdc++-6.dll +0 -0
  922. mindspore/libtinyxml2.dll +0 -0
  923. mindspore/libturbojpeg.dll +0 -0
  924. mindspore/libwinpthread-1.dll +0 -0
  925. mindspore/nn/layer/quant.py +0 -1868
  926. mindspore/nn/layer/rnn_utils.py +0 -90
  927. mindspore/nn/probability/dpn/__init__.py +0 -22
  928. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  929. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  930. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  931. mindspore/nn/probability/infer/__init__.py +0 -22
  932. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  933. mindspore/nn/probability/infer/variational/svi.py +0 -84
  934. mindspore/nn/probability/toolbox/__init__.py +0 -22
  935. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  936. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  937. mindspore/nn/probability/transforms/__init__.py +0 -22
  938. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  939. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  940. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  941. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  942. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  943. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  944. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  945. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  946. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  947. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  948. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  949. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  950. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  951. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  952. mindspore/ops/composite/array_ops.py +0 -210
  953. mindspore/ops/composite/clip_ops.py +0 -238
  954. mindspore/ops/composite/random_ops.py +0 -426
  955. mindspore/ops/composite/vmap_ops.py +0 -38
  956. mindspore/ops/operations/sponge_ops.py +0 -3531
  957. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  958. mindspore/parallel/nn/__init__.py +0 -42
  959. mindspore/parallel/nn/loss.py +0 -22
  960. mindspore/parallel/nn/moe.py +0 -21
  961. mindspore/parallel/nn/op_parallel_config.py +0 -22
  962. mindspore/parallel/nn/transformer.py +0 -31
  963. mindspore/run_check/_check_deps_version.py +0 -84
  964. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  965. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  966. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -22,15 +22,16 @@ their training models.
22
22
  import numpy as np
23
23
 
24
24
  import mindspore._c_dataengine as cde
25
- from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormType, \
26
- ResampleMethod, ScaleType, WindowType
25
+ from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
26
+ NormType, ResampleMethod, ScaleType, WindowType
27
27
  from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
28
28
  check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
29
29
  check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
30
30
  check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
31
- check_highpass_biquad, check_inverse_mel_scale, check_lfilter, check_lowpass_biquad, check_magphase, \
32
- check_mask_along_axis, check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, \
33
- check_overdrive, check_phase_vocoder, check_phaser, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
31
+ check_highpass_biquad, check_inverse_mel_scale, check_inverse_spectrogram, check_lfcc, check_lfilter, \
32
+ check_lowpass_biquad, check_magphase, check_mask_along_axis, check_mask_along_axis_iid, check_masking, \
33
+ check_mel_scale, check_mel_spectrogram, check_mfcc, check_mu_law_coding, check_overdrive, check_phase_vocoder, \
34
+ check_phaser, check_pitch_shift, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
34
35
  check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vad, check_vol
35
36
  from ..transforms.py_transforms_util import Implementation
36
37
  from ..transforms.transforms import TensorOperation
@@ -68,7 +69,7 @@ class AllpassBiquad(AudioTensorOperation):
68
69
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
69
70
 
70
71
  Note:
71
- The dimension of the audio waveform to be processed needs to be (..., time).
72
+ The shape of the audio waveform to be processed needs to be <..., time>.
72
73
 
73
74
  Args:
74
75
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -77,7 +78,7 @@ class AllpassBiquad(AudioTensorOperation):
77
78
  in range of (0, 1]. Default: 0.707.
78
79
 
79
80
  Raises:
80
- TypeError: If `sample_rate` is not of type integer.
81
+ TypeError: If `sample_rate` is not of type int.
81
82
  ValueError: If `sample_rate` is 0.
82
83
  TypeError: If `central_freq` is not of type float.
83
84
  TypeError: If `Q` is not of type float.
@@ -116,22 +117,22 @@ class AmplitudeToDB(AudioTensorOperation):
116
117
  Turn the input audio waveform from the amplitude/power scale to decibel scale.
117
118
 
118
119
  Note:
119
- The dimension of the audio waveform to be processed needs to be (..., freq, time).
120
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
120
121
 
121
122
  Args:
122
123
  stype (ScaleType, optional): Scale of the input waveform, which can be
123
124
  ScaleType.POWER or ScaleType.MAGNITUDE. Default: ScaleType.POWER.
124
125
  ref_value (float, optional): Multiplier reference value for generating
125
- `db_multiplier`. Default: 1.0. The formula is
126
+ `db_multiplier` . Default: 1.0. The formula is
126
127
 
127
- :math:`\text{db_multiplier} = Log10(max(\text{ref_value}, amin))`.
128
+ :math:`\text{db_multiplier} = Log10(max(\text{ref_value}, amin))` .
128
129
 
129
130
  amin (float, optional): Lower bound to clamp the input waveform, which must
130
131
  be greater than zero. Default: 1e-10.
131
132
  top_db (float, optional): Minimum cut-off decibels, which must be non-negative. Default: 80.0.
132
133
 
133
134
  Raises:
134
- TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.utils.ScaleType`.
135
+ TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.ScaleType` .
135
136
  TypeError: If `ref_value` is not of type float.
136
137
  ValueError: If `ref_value` is not a positive number.
137
138
  TypeError: If `amin` is not of type float.
@@ -170,7 +171,7 @@ class Angle(AudioTensorOperation):
170
171
  Calculate the angle of complex number sequence.
171
172
 
172
173
  Note:
173
- The dimension of the audio waveform to be processed needs to be (..., complex=2).
174
+ The shape of the audio waveform to be processed needs to be <..., complex=2>.
174
175
  The first dimension represents the real part while the second represents the imaginary.
175
176
 
176
177
  Raises:
@@ -203,7 +204,7 @@ class BandBiquad(AudioTensorOperation):
203
204
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
204
205
 
205
206
  Note:
206
- The dimension of the audio waveform to be processed needs to be (..., time).
207
+ The shape of the audio waveform to be processed needs to be <..., time>.
207
208
 
208
209
  Args:
209
210
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -214,7 +215,7 @@ class BandBiquad(AudioTensorOperation):
214
215
  If False, uses mode oriented to pitched audio, i.e. voice, singing, or instrumental music. Default: False.
215
216
 
216
217
  Raises:
217
- TypeError: If `sample_rate` is not of type integer.
218
+ TypeError: If `sample_rate` is not of type int.
218
219
  ValueError: If `sample_rate` is 0.
219
220
  TypeError: If `central_freq` is not of type float.
220
221
  TypeError: If `Q` is not of type float.
@@ -264,7 +265,7 @@ class BandpassBiquad(AudioTensorOperation):
264
265
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
265
266
 
266
267
  Note:
267
- The dimension of the audio waveform to be processed needs to be (..., time).
268
+ The shape of the audio waveform to be processed needs to be <..., time>.
268
269
 
269
270
  Args:
270
271
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -275,7 +276,7 @@ class BandpassBiquad(AudioTensorOperation):
275
276
  If False, uses a constant 0dB peak gain. Default: False.
276
277
 
277
278
  Raises:
278
- TypeError: If `sample_rate` is not of type integer.
279
+ TypeError: If `sample_rate` is not of type int.
279
280
  ValueError: If `sample_rate` is 0.
280
281
  TypeError: If `central_freq` is not of type float.
281
282
  TypeError: If `Q` is not of type float.
@@ -323,7 +324,7 @@ class BandrejectBiquad(AudioTensorOperation):
323
324
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
324
325
 
325
326
  Note:
326
- The dimension of the audio waveform to be processed needs to be (..., time).
327
+ The shape of the audio waveform to be processed needs to be <..., time>.
327
328
 
328
329
  Args:
329
330
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -332,7 +333,7 @@ class BandrejectBiquad(AudioTensorOperation):
332
333
  in range of (0, 1]. Default: 0.707.
333
334
 
334
335
  Raises:
335
- TypeError: If `sample_rate` is not of type integer.
336
+ TypeError: If `sample_rate` is not of type int.
336
337
  ValueError: If `sample_rate` is 0.
337
338
  TypeError: If `central_freq` is not of type float.
338
339
  TypeError: If `Q` is not of type float.
@@ -375,7 +376,7 @@ class BassBiquad(AudioTensorOperation):
375
376
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
376
377
 
377
378
  Note:
378
- The dimension of the audio waveform to be processed needs to be (..., time).
379
+ The shape of the audio waveform to be processed needs to be <..., time>.
379
380
 
380
381
  Args:
381
382
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -385,7 +386,7 @@ class BassBiquad(AudioTensorOperation):
385
386
  in range of (0, 1]. Default: 0.707.
386
387
 
387
388
  Raises:
388
- TypeError: If `sample_rate` is not of type integer.
389
+ TypeError: If `sample_rate` is not of type int.
389
390
  ValueError: If `sample_rate` is 0.
390
391
  TypeError: If `gain` is not of type float.
391
392
  TypeError: If `central_freq` is not of type float.
@@ -420,15 +421,28 @@ class BassBiquad(AudioTensorOperation):
420
421
  class Biquad(TensorOperation):
421
422
  """
422
423
  Perform a biquad filter of input audio.
424
+ Mathematical fomulas refer to: `Digital_biquad_filter <https://en.wikipedia.org/wiki/Digital_biquad_filter>`_ .
423
425
 
424
426
  Args:
425
427
  b0 (float): Numerator coefficient of current input, x[n].
426
428
  b1 (float): Numerator coefficient of input one time step ago x[n-1].
427
429
  b2 (float): Numerator coefficient of input two time steps ago x[n-2].
428
- a0 (float): Denominator coefficient of current output y[n], the value can't be zero, typically 1.
430
+ a0 (float): Denominator coefficient of current output y[n], the value can't be 0, typically 1.
429
431
  a1 (float): Denominator coefficient of current output y[n-1].
430
432
  a2 (float): Denominator coefficient of current output y[n-2].
431
433
 
434
+ Raises:
435
+ TypeError: If `b0` is not of type float.
436
+ TypeError: If `b1` is not of type float.
437
+ TypeError: If `b2` is not of type float.
438
+ TypeError: If `a0` is not of type float.
439
+ TypeError: If `a1` is not of type float.
440
+ TypeError: If `a2` is not of type float.
441
+ ValueError: If `a0` is 0.
442
+
443
+ Supported Platforms:
444
+ ``CPU``
445
+
432
446
  Examples:
433
447
  >>> import numpy as np
434
448
  >>>
@@ -456,7 +470,7 @@ class ComplexNorm(AudioTensorOperation):
456
470
  Compute the norm of complex number sequence.
457
471
 
458
472
  Note:
459
- The dimension of the audio waveform to be processed needs to be (..., complex=2).
473
+ The shape of the audio waveform to be processed needs to be <..., complex=2>.
460
474
  The first dimension represents the real part while the second represents the imaginary.
461
475
 
462
476
  Args:
@@ -498,25 +512,37 @@ DE_C_BORDER_TYPE = {
498
512
 
499
513
  class ComputeDeltas(AudioTensorOperation):
500
514
  r"""
501
- Compute delta coefficients of a spectrogram.
515
+ Compute delta coefficients, also known as differential coefficients, of a spectrogram.
516
+
517
+ Delta coefficients help to understand the dynamics of the power spectrum. It can be
518
+ computed using the following formula.
502
519
 
503
520
  .. math::
504
521
  d_{t}=\frac{{\textstyle\sum_{n=1}^{N}}n(c_{t+n}-c_{t-n})}{2{\textstyle\sum_{n=1}^{N}}n^{2}}
505
522
 
506
- Args:
507
- win_length (int, optional): The window length used for computing delta, must be no less than 3 (default=5).
508
- pad_mode (BorderType, optional): Mode parameter passed to padding (default=BorderType.EDGE).It can be any of
509
- [BorderType.CONSTANT, BorderType.EDGE, BorderType.REFLECT, BordBorderTypeer.SYMMETRIC].
523
+ where :math:`d_{t}` is the deltas at time :math:`t` , :math:`c_{t}` is the spectrogram coefficients
524
+ at time :math:`t` , :math:`N` is :math:`(\text{win_length} - 1) // 2` .
510
525
 
511
- - BorderType.CONSTANT, means it fills the border with constant values.
526
+ Args:
527
+ win_length (int, optional): The window length used for computing delta, must be no less than 3. Default: 5.
528
+ pad_mode (BorderType, optional): Mode parameter passed to padding, can be BorderType.CONSTANT, BorderType.EDGE,
529
+ BorderType.REFLECT or BorderType.SYMMETRIC. Default: BorderType.EDGE.
512
530
 
513
- - BorderType.EDGE, means it pads with the last value on the edge.
531
+ - BorderType.CONSTANT, pad with a constant value.
532
+ - BorderType.EDGE, pad with the last value on the edge.
533
+ - BorderType.REFLECT, reflect the value on the edge while omitting the last one.
534
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
535
+ - BorderType.SYMMETRIC, reflect the value on the edge while repeating the last one.
536
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
514
537
 
515
- - BorderType.REFLECT, means it reflects the values on the edge omitting the last
516
- value of edge.
538
+ Raises:
539
+ TypeError: If `win_length` is not of type int.
540
+ ValueError: If `win_length` is less than 3.
541
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
542
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
517
543
 
518
- - BorderType.SYMMETRIC, means it reflects the values on the edge repeating the last
519
- value of edge.
544
+ Supported Platforms:
545
+ ``CPU``
520
546
 
521
547
  Examples:
522
548
  >>> import numpy as np
@@ -546,9 +572,8 @@ class Contrast(AudioTensorOperation):
546
572
 
547
573
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
548
574
 
549
-
550
575
  Note:
551
- The dimension of the audio waveform to be processed needs to be (..., time).
576
+ The shape of the audio waveform to be processed needs to be <..., time>.
552
577
 
553
578
  Args:
554
579
  enhancement_amount (float, optional): Controls the amount of the enhancement,
@@ -589,6 +614,13 @@ class DBToAmplitude(AudioTensorOperation):
589
614
  ref (float): Reference which the output will be scaled by.
590
615
  power (float): If power equals 1, will compute DB to power. If 0.5, will compute DB to amplitude.
591
616
 
617
+ Raises:
618
+ TypeError: If `ref` is not of type float.
619
+ TypeError: If `power` is not of type float.
620
+
621
+ Supported Platforms:
622
+ ``CPU``
623
+
592
624
  Examples:
593
625
  >>> import numpy as np
594
626
  >>>
@@ -610,12 +642,20 @@ class DBToAmplitude(AudioTensorOperation):
610
642
 
611
643
  class DCShift(AudioTensorOperation):
612
644
  """
613
- Apply a DC shift to the audio.
645
+ Apply a DC shift to the audio. This can be useful to remove DC offset from audio.
614
646
 
615
647
  Args:
616
648
  shift (float): The amount to shift the audio, the value must be in the range [-2.0, 2.0].
617
649
  limiter_gain (float, optional): Used only on peaks to prevent clipping,
618
- the value should be much less than 1, such as 0.05 or 0.02.
650
+ the value should be much less than 1, such as 0.05 or 0.02. Default: None, will be set to `shift` .
651
+
652
+ Raises:
653
+ TypeError: If `shift` is not of type float.
654
+ ValueError: If `shift` is not in range [-2.0, 2.0].
655
+ TypeError: If `limiter_gain` is not of type float.
656
+
657
+ Supported Platforms:
658
+ ``CPU``
619
659
 
620
660
  Examples:
621
661
  >>> import numpy as np
@@ -638,11 +678,20 @@ class DCShift(AudioTensorOperation):
638
678
 
639
679
  class DeemphBiquad(AudioTensorOperation):
640
680
  """
641
- Design two-pole deemph filter for audio waveform of dimension of (..., time).
681
+ Apply Compact Disc (IEC 60908) de-emphasis (a treble attenuation shelving filter) to the audio waveform.
682
+
683
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
642
684
 
643
685
  Args:
644
- sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
645
- the value must be 44100 or 48000.
686
+ sample_rate (int): Sampling rate of the waveform, must be 44100 or 48000 (Hz).
687
+
688
+ Raises:
689
+ TypeError: If `sample_rate` is not of type int.
690
+ ValueError: If `sample_rate` is not 44100 or 48000.
691
+ RuntimeError: If input tensor is not in shape of <..., time>.
692
+
693
+ Supported Platforms:
694
+ ``CPU``
646
695
 
647
696
  Examples:
648
697
  >>> import numpy as np
@@ -670,13 +719,28 @@ class DetectPitchFrequency(AudioTensorOperation):
670
719
 
671
720
  Args:
672
721
  sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
673
- frame_time (float, optional): Duration of a frame, the value must be greater than zero (default=0.01).
722
+ frame_time (float, optional): Duration of a frame, the value must be greater than zero. Default: 0.01.
674
723
  win_length (int, optional): The window length for median smoothing (in number of frames), the value must be
675
- greater than zero (default=30).
676
- freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero
677
- (default=85).
678
- freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero
679
- (default=3400).
724
+ greater than zero. Default: 30.
725
+ freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero.
726
+ Default: 85.
727
+ freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero.
728
+ Default: 3400.
729
+
730
+ Raises:
731
+ TypeError: If `sample_rate` is not of type int.
732
+ ValueError: If `sample_rate` is 0.
733
+ TypeError: If `frame_time` is not of type float.
734
+ ValueError: If `frame_time` is not positive.
735
+ TypeError: If `win_length` is not of type int.
736
+ ValueError: If `win_length` is not positive.
737
+ TypeError: If `freq_low` is not of type int.
738
+ ValueError: If `freq_low` is not positive.
739
+ TypeError: If `freq_high` is not of type int.
740
+ ValueError: If `freq_high` is not positive.
741
+
742
+ Supported Platforms:
743
+ ``CPU``
680
744
 
681
745
  Examples:
682
746
  >>> import numpy as np
@@ -714,12 +778,20 @@ class Dither(AudioTensorOperation):
714
778
 
715
779
  Args:
716
780
  density_function (DensityFunction, optional): The density function of a continuous
717
- random variable. Can be one of DensityFunction.TPDF (Triangular Probability Density Function),
781
+ random variable, can be DensityFunction.TPDF (Triangular Probability Density Function),
718
782
  DensityFunction.RPDF (Rectangular Probability Density Function) or
719
- DensityFunction.GPDF (Gaussian Probability Density Function)
720
- (default=DensityFunction.TPDF).
783
+ DensityFunction.GPDF (Gaussian Probability Density Function).
784
+ Default: DensityFunction.TPDF.
721
785
  noise_shaping (bool, optional): A filtering process that shapes the spectral
722
- energy of quantisation error (default=False).
786
+ energy of quantisation error. Default: False.
787
+
788
+ Raises:
789
+ TypeError: If `density_function` is not of type :class:`mindspore.dataset.audio.DensityFunction` .
790
+ TypeError: If `noise_shaping` is not of type bool.
791
+ RuntimeError: If input tensor is not in shape of <..., time>.
792
+
793
+ Supported Platforms:
794
+ ``CPU``
723
795
 
724
796
  Examples:
725
797
  >>> import numpy as np
@@ -742,13 +814,26 @@ class Dither(AudioTensorOperation):
742
814
 
743
815
  class EqualizerBiquad(AudioTensorOperation):
744
816
  """
745
- Design biquad equalizer filter and perform filtering. Similar to SoX implementation.
817
+ Design biquad equalizer filter and perform filtering.
818
+
819
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
746
820
 
747
821
  Args:
748
- sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
822
+ sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
749
823
  center_freq (float): Central frequency (in Hz).
750
824
  gain (float): Desired gain at the boost (or attenuation) in dB.
751
- Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
825
+ Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
826
+
827
+ Raises:
828
+ TypeError: If `sample_rate` is not of type int.
829
+ ValueError: If `sample_rate` is 0.
830
+ TypeError: If `center_freq` is not of type float.
831
+ TypeError: If `gain` is not of type float.
832
+ TypeError: If `Q` is not of type float.
833
+ ValueError: If `Q` is not in range of (0, 1].
834
+
835
+ Supported Platforms:
836
+ ``CPU``
752
837
 
753
838
  Examples:
754
839
  >>> import numpy as np
@@ -783,11 +868,10 @@ class Fade(AudioTensorOperation):
783
868
  Add a fade in and/or fade out to an waveform.
784
869
 
785
870
  Args:
786
- fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative (default=0).
787
- fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative (default=0).
788
- fade_shape (FadeShape, optional): Shape of fade (default=FadeShape.LINEAR). Can be one of
789
- FadeShape.QUARTER_SINE, FadeShape.HALF_SINE, FadeShape.LINEAR, FadeShape.LOGARITHMIC or
790
- FadeShape.EXPONENTIAL.
871
+ fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative. Default: 0.
872
+ fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative. Default: 0.
873
+ fade_shape (FadeShape, optional): Shape of fade, five different types can be chosen as defined in FadeShape.
874
+ Default: FadeShape.LINEAR.
791
875
 
792
876
  -FadeShape.QUARTER_SINE, means it tend to 0 in an quarter sin function.
793
877
 
@@ -803,6 +887,9 @@ class Fade(AudioTensorOperation):
803
887
  RuntimeError: If fade_in_len exceeds waveform length.
804
888
  RuntimeError: If fade_out_len exceeds waveform length.
805
889
 
890
+ Supported Platforms:
891
+ ``CPU``
892
+
806
893
  Examples:
807
894
  >>> import numpy as np
808
895
  >>> from mindspore.dataset.audio import FadeShape
@@ -829,13 +916,20 @@ class Filtfilt(AudioTensorOperation):
829
916
  Apply an IIR filter forward and backward to a waveform.
830
917
 
831
918
  Args:
832
- a_coeffs (Sequence): denominator coefficients of difference equation of dimension of (n_order + 1).
919
+ a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
833
920
  Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
834
921
  Must be same size as b_coeffs (pad with 0's as necessary).
835
- b_coeffs (Sequence): numerator coefficients of difference equation of dimension of (n_order + 1).
922
+ b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
836
923
  Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
837
924
  Must be same size as a_coeffs (pad with 0's as necessary).
838
- clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default=True.
925
+ clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True.
926
+
927
+ Raises:
928
+ TypeError: If `a_coeffs` is not of type Sequence[float].
929
+ TypeError: If `b_coeffs` is not of type Sequence[float].
930
+ ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
931
+ TypeError: If `clamp` is not of type bool.
932
+ RuntimeError: If shape of the input audio is not <..., time>.
839
933
 
840
934
  Examples:
841
935
  >>> import numpy as np
@@ -870,18 +964,42 @@ class Flanger(AudioTensorOperation):
870
964
  """
871
965
  Apply a flanger effect to the audio.
872
966
 
967
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
968
+
873
969
  Args:
874
970
  sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
875
- delay (float, optional): Desired delay in milliseconds (ms), range: [0, 30] (default=0.0).
876
- depth (float, optional): Desired delay depth in milliseconds (ms), range: [0, 10] (default=2.0).
877
- regen (float, optional): Desired regen (feedback gain) in dB, range: [-95, 95] (default=0.0).
878
- width (float, optional): Desired width (delay gain) in dB, range: [0, 100] (default=71.0).
879
- speed (float, optional): Modulation speed in Hz, range: [0.1, 10] (default=0.5).
880
- phase (float, optional): Percentage phase-shift for multi-channel, range: [0, 100] (default=25.0).
881
- modulation (Modulation, optional): Modulation of the input tensor (default=Modulation.SINUSOIDAL).
882
- It can be one of Modulation.SINUSOIDAL or Modulation.TRIANGULAR.
883
- interpolation (Interpolation, optional): Interpolation of the input tensor (default=Interpolation.LINEAR).
884
- It can be one of Interpolation.LINEAR or Interpolation.QUADRATIC.
971
+ delay (float, optional): Desired delay in milliseconds, in range of [0, 30]. Default: 0.0.
972
+ depth (float, optional): Desired delay depth in milliseconds, in range of [0, 10]. Default: 2.0.
973
+ regen (float, optional): Desired regen (feedback gain) in dB, in range of [-95, 95]. Default: 0.0.
974
+ width (float, optional): Desired width (delay gain) in dB, in range of [0, 100]. Default: 71.0.
975
+ speed (float, optional): Modulation speed in Hz, in range of [0.1, 10]. Default: 0.5.
976
+ phase (float, optional): Percentage phase-shift for multi-channel, in range of [0, 100]. Default: 25.0.
977
+ modulation (Modulation, optional): Modulation method, can be Modulation.SINUSOIDAL or Modulation.TRIANGULAR.
978
+ Default: Modulation.SINUSOIDAL.
979
+ interpolation (Interpolation, optional): Interpolation method, can be Interpolation.LINEAR or
980
+ Interpolation.QUADRATIC. Default: Interpolation.LINEAR.
981
+
982
+ Raises:
983
+ TypeError: If `sample_rate` is not of type int.
984
+ ValueError: If `sample_rate` is zero.
985
+ TypeError: If `delay` is not of type float.
986
+ ValueError: If `delay` is not in range of [0, 30].
987
+ TypeError: If `depth` is not of type float.
988
+ ValueError: If `depth` is not in range of [0, 10].
989
+ TypeError: If `regen` is not of type float.
990
+ ValueError: If `regen` is not in range of [-95, 95].
991
+ TypeError: If `width` is not of type float.
992
+ ValueError: If `width` is not in range of [0, 100].
993
+ TypeError: If `speed` is not of type float.
994
+ ValueError: If `speed` is not in range of [0.1, 10].
995
+ TypeError: If `phase` is not of type float.
996
+ ValueError: If `phase` is not in range of [0, 100].
997
+ TypeError: If `modulation` is not of type :class:`mindspore.dataset.audio.Modulation` .
998
+ TypeError: If `interpolation` is not of type :class:`mindspore.dataset.audio.Interpolation` .
999
+ RuntimeError: If input tensor is not in shape of <..., channel, time>.
1000
+
1001
+ Supported Platforms:
1002
+ ``CPU``
885
1003
 
886
1004
  Examples:
887
1005
  >>> import numpy as np
@@ -917,7 +1035,7 @@ class FrequencyMasking(AudioTensorOperation):
917
1035
  Apply masking to a spectrogram in the frequency domain.
918
1036
 
919
1037
  Note:
920
- The dimension of the audio waveform to be processed needs to be (..., freq, time).
1038
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
921
1039
 
922
1040
  Args:
923
1041
  iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
@@ -932,9 +1050,9 @@ class FrequencyMasking(AudioTensorOperation):
932
1050
 
933
1051
  Raises:
934
1052
  TypeError: If `iid_masks` is not of type bool.
935
- TypeError: If `freq_mask_param` is not of type integer.
1053
+ TypeError: If `freq_mask_param` is not of type int.
936
1054
  ValueError: If `freq_mask_param` is greater than the length of audio waveform in frequency domain.
937
- TypeError: If `mask_start` is not of type integer.
1055
+ TypeError: If `mask_start` is not of type int.
938
1056
  ValueError: If `mask_start` is a negative number.
939
1057
  TypeError: If `mask_value` is not of type float.
940
1058
  ValueError: If `mask_value` is a negative number.
@@ -974,7 +1092,13 @@ class Gain(AudioTensorOperation):
974
1092
  Apply amplification or attenuation to the whole waveform.
975
1093
 
976
1094
  Args:
977
- gain_db (float): Gain adjustment in decibels (dB) (default=1.0).
1095
+ gain_db (float): Gain adjustment in decibels (dB). Default: 1.0.
1096
+
1097
+ Raises:
1098
+ TypeError: If `gain_db` is not of type float.
1099
+
1100
+ Supported Platforms:
1101
+ ``CPU``
978
1102
 
979
1103
  Examples:
980
1104
  >>> import numpy as np
@@ -996,28 +1120,49 @@ class Gain(AudioTensorOperation):
996
1120
 
997
1121
  class GriffinLim(AudioTensorOperation):
998
1122
  r"""
999
- Approximate magnitude spectrogram inversion using the GriffinLim algorithm.
1000
-
1001
- .. math::
1002
- x(n)=\frac{\sum_{m=-\infty}^{\infty} w(m S-n) y_{w}(m S, n)}{\sum_{m=-\infty}^{\infty} w^{2}(m S-n)}
1123
+ Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
1003
1124
 
1004
- where w represents the window function, y represents the reconstructed signal of each frame and x represents the
1005
- whole signal.
1125
+ About Griffin-Lim please refer to `A fast Griffin-Lim algorithm <https://doi.org/10.1109/WASPAA.2013.6701851>`_
1126
+ and `Signal estimation from modified short-time Fourier transform <https://doi.org/10.1109/ICASSP.1983.1172092>`_ .
1006
1127
 
1007
1128
  Args:
1008
- n_fft (int, optional): Size of FFT (default=400).
1009
- n_iter (int, optional): Number of iteration for phase recovery (default=32).
1010
- win_length (int, optional): Window size for GriffinLim (default=None, will be set to n_fft).
1011
- hop_length (int, optional): Length of hop between STFT windows (default=None, will be set to win_length // 2).
1129
+ n_fft (int, optional): Size of FFT. Default: 400.
1130
+ n_iter (int, optional): Number of iteration for phase recovery. Default: 32.
1131
+ win_length (int, optional): Window size for GriffinLim. Default: None, will be set to `n_fft` .
1132
+ hop_length (int, optional): Length of hop between STFT windows.
1133
+ Default: None, will be set to `win_length // 2` .
1012
1134
  window_type (WindowType, optional): Window type for GriffinLim, which can be WindowType.BARTLETT,
1013
- WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN or WindowType.KAISER (default=WindowType.HANN).
1135
+ WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN or WindowType.KAISER. Default: WindowType.HANN.
1014
1136
  Currently kaiser window is not supported on macOS.
1015
- power (float, optional): Exponent for the magnitude spectrogram (default=2.0).
1016
- momentum (float, optional): The momentum for fast Griffin-Lim (default=0.99).
1017
- length (int, optional): Length of the expected output waveform (default=None, will be set to the value of last
1018
- dimension of the stft matrix).
1019
- rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization
1020
- (default=True).
1137
+ power (float, optional): Exponent for the magnitude spectrogram. Default: 2.0.
1138
+ momentum (float, optional): The momentum for fast Griffin-Lim. Default: 0.99.
1139
+ length (int, optional): Length of the expected output waveform. Default: None, will be set to the value of last
1140
+ dimension of the stft matrix.
1141
+ rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization.
1142
+ Default: True.
1143
+
1144
+ Raises:
1145
+ TypeError: If `n_fft` is not of type int.
1146
+ ValueError: If `n_fft` is not positive.
1147
+ TypeError: If `n_iter` is not of type int.
1148
+ ValueError: If `n_iter` is not positive.
1149
+ TypeError: If `win_length` is not of type int.
1150
+ ValueError: If `win_length` is a negative number.
1151
+ TypeError: If `hop_length` is not of type int.
1152
+ ValueError: If `hop_length` is a negative number.
1153
+ TypeError: If `window_type` is not of type :class:`mindspore.dataset.audio.WindowType` .
1154
+ TypeError: If `power` is not of type float.
1155
+ ValueError: If `power` is not positive.
1156
+ TypeError: If `momentum` is not of type float.
1157
+ ValueError: If `momentum` is a negative number.
1158
+ TypeError: If `length` is not of type int.
1159
+ ValueError: If `length` is a negative number.
1160
+ TypeError: If `rand_init` is not of type bool.
1161
+ RuntimeError: If `n_fft` is not less than `length` .
1162
+ RuntimeError: If `win_length` is not less than `n_fft` .
1163
+
1164
+ Supported Platforms:
1165
+ ``CPU``
1021
1166
 
1022
1167
  Examples:
1023
1168
  >>> import numpy as np
@@ -1029,7 +1174,7 @@ class GriffinLim(AudioTensorOperation):
1029
1174
  """
1030
1175
 
1031
1176
  @check_griffin_lim
1032
- def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN, power=2,
1177
+ def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN, power=2.0,
1033
1178
  momentum=0.99, length=None, rand_init=True):
1034
1179
  super().__init__()
1035
1180
  self.n_fft = n_fft
@@ -1050,12 +1195,25 @@ class GriffinLim(AudioTensorOperation):
1050
1195
 
1051
1196
  class HighpassBiquad(AudioTensorOperation):
1052
1197
  """
1053
- Design biquad highpass filter and perform filtering. Similar to SoX implementation.
1198
+ Design biquad highpass filter and perform filtering.
1199
+
1200
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1054
1201
 
1055
1202
  Args:
1056
- sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
1203
+ sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
1057
1204
  cutoff_freq (float): Filter cutoff frequency (in Hz).
1058
- Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
1205
+ Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
1206
+
1207
+ Raises:
1208
+ TypeError: If `sample_rate` is not of type int.
1209
+ ValueError: If `sample_rate` is 0.
1210
+ TypeError: If `cutoff_freq` is not of type float.
1211
+ TypeError: If `Q` is not of type float.
1212
+ ValueError: If `Q` is not in range of (0, 1].
1213
+ RuntimeError: If the shape of input audio waveform does not match <..., time>.
1214
+
1215
+ Supported Platforms:
1216
+ ``CPU``
1059
1217
 
1060
1218
  Examples:
1061
1219
  >>> import numpy as np
@@ -1079,22 +1237,46 @@ class HighpassBiquad(AudioTensorOperation):
1079
1237
 
1080
1238
  class InverseMelScale(AudioTensorOperation):
1081
1239
  """
1082
- Solve for a normal STFT form a mel frequency STFT, using a conversion matrix.
1240
+ Solve for a normal STFT from a mel frequency STFT, using a conversion matrix.
1083
1241
 
1084
1242
  Args:
1085
1243
  n_stft (int): Number of bins in STFT.
1086
- n_mels (int, optional): Number of mel filterbanks (default=128).
1087
- sample_rate (int, optional): Sample rate of audio signal (default=16000).
1088
- f_min (float, optional): Minimum frequency (default=0.0).
1089
- f_max (float, optional): Maximum frequency (default=None, will be set to sample_rate // 2).
1090
- max_iter (int, optional): Maximum number of optimization iterations (default=100000).
1091
- tolerance_loss (float, optional): Value of loss to stop optimization at (default=1e-5).
1092
- tolerance_change (float, optional): Difference in losses to stop optimization at (default=1e-8).
1093
- sgdargs (dict, optional): Arguments for the SGD optimizer (default=None, will be set to
1094
- {'sgd_lr': 0.1, 'sgd_momentum': 0.9}).
1095
- norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE
1096
- (default=NormType.NONE).
1097
- mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK (default=MelType.HTK).
1244
+ n_mels (int, optional): Number of mel filterbanks. Default: 128.
1245
+ sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
1246
+ f_min (float, optional): Minimum frequency. Default: 0.0.
1247
+ f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
1248
+ max_iter (int, optional): Maximum number of optimization iterations. Default: 100000.
1249
+ tolerance_loss (float, optional): Value of loss to stop optimization at. Default: 1e-5.
1250
+ tolerance_change (float, optional): Difference in losses to stop optimization at. Default: 1e-8.
1251
+ sgdargs (dict, optional): Arguments for the SGD optimizer. Default: None, will be set to
1252
+ {'sgd_lr': 0.1, 'sgd_momentum': 0.9}.
1253
+ norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE.
1254
+ Default: NormType.NONE, no narmalization.
1255
+ mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
1256
+
1257
+ Raises:
1258
+ TypeError: If `n_stft` is not of type int.
1259
+ ValueError: If `n_stft` is not positive.
1260
+ TypeError: If `n_mels` is not of type int.
1261
+ ValueError: If `n_mels` is not positive.
1262
+ TypeError: If `sample_rate` is not of type int.
1263
+ ValueError: If `sample_rate` is not positive.
1264
+ TypeError: If `f_min` is not of type float.
1265
+ ValueError: If `f_min` is greater than or equal to `f_max` .
1266
+ TypeError: If `f_max` is not of type float.
1267
+ ValueError: If `f_max` is a negative number.
1268
+ TypeError: If `max_iter` is not of type int.
1269
+ ValueError: If `max_iter` is a negative number.
1270
+ TypeError: If `tolerance_loss` is not of type float.
1271
+ ValueError: If `tolerance_loss` is a negative number.
1272
+ TypeError: If `tolerance_change` is not of type float.
1273
+ ValueError: If `tolerance_change` is a negative number.
1274
+ TypeError: If `sgdargs` is not of type dict.
1275
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
1276
+ TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
1277
+
1278
+ Supported Platforms:
1279
+ ``CPU``
1098
1280
 
1099
1281
  Examples:
1100
1282
  >>> import numpy as np
@@ -1130,18 +1312,199 @@ class InverseMelScale(AudioTensorOperation):
1130
1312
  DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
1131
1313
 
1132
1314
 
1315
+ class InverseSpectrogram(AudioTensorOperation):
1316
+ """
1317
+ Create an inverse spectrogram to recover an audio signal from a spectrogram.
1318
+
1319
+ Args:
1320
+ length (int, optional): The output length of the waveform, must be non negative. Default: None,
1321
+ means to output the whole waveform.
1322
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0.
1323
+ Default: 400.
1324
+ win_length (int, optional): Window size, which should be greater than 0.
1325
+ Default: None, will be set to `n_fft` .
1326
+ hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
1327
+ Default: None, will be set to `win_length // 2` .
1328
+ pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: 0.
1329
+ window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
1330
+ frame/window. Default: WindowType.HANN.
1331
+ normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: False.
1332
+ center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: True.
1333
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
1334
+ can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
1335
+ Default: BorderType.REFLECT.
1336
+ onesided (bool, optional): Controls whether spectrogram was used to return half of results to avoid
1337
+ redundancy. Default: True.
1338
+
1339
+ Raises:
1340
+ TypeError: If `length` is not of type int.
1341
+ ValueError: If `length` is a negative number.
1342
+ TypeError: If `n_fft` is not of type int.
1343
+ ValueError: If `n_fft` is not positive.
1344
+ TypeError: If `win_length` is not of type int.
1345
+ ValueError: If `win_length` is not positive.
1346
+ TypeError: If `hop_length` is not of type int.
1347
+ ValueError: If `hop_length` is not positive.
1348
+ TypeError: If `pad` is not of type int.
1349
+ ValueError: If `pad` is a negative number.
1350
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
1351
+ TypeError: If `normalized` is not of type bool.
1352
+ TypeError: If `center` is not of type bool.
1353
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
1354
+ TypeError: If `onesided` is not of type bool.
1355
+
1356
+ Supported Platforms:
1357
+ ``CPU``
1358
+
1359
+ Examples:
1360
+ >>> import numpy as np
1361
+ >>>
1362
+ >>> waveform = np.array([[[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
1363
+ ... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]]])
1364
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1365
+ >>> transforms = [audio.InverseSpectrogram(1, 400, 400, 200)]
1366
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1367
+ """
1368
+
1369
+ @check_inverse_spectrogram
1370
+ def __init__(self, length=None, n_fft=400, win_length=None, hop_length=None, pad=0,
1371
+ window=WindowType.HANN, normalized=False, center=True,
1372
+ pad_mode=BorderType.REFLECT, onesided=True):
1373
+ super().__init__()
1374
+ self.length = length if length is not None else 0
1375
+ self.n_fft = n_fft
1376
+ self.win_length = win_length if win_length is not None else n_fft
1377
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 2
1378
+ self.pad = pad
1379
+ self.window = window
1380
+ self.normalized = normalized
1381
+ self.center = center
1382
+ self.pad_mode = pad_mode
1383
+ self.onesided = onesided
1384
+
1385
+ def parse(self):
1386
+ return cde.InverseSpectrogramOperation(self.length, self.n_fft, self.win_length, self.hop_length, self.pad,
1387
+ DE_C_WINDOW_TYPE.get(self.window), self.normalized, self.center,
1388
+ DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided)
1389
+
1390
+
1391
+ DE_C_NORM_MODE = {NormMode.ORTHO: cde.NormMode.DE_NORM_MODE_ORTHO,
1392
+ NormMode.NONE: cde.NormMode.DE_NORM_MODE_NONE}
1393
+
1394
+
1395
+ class LFCC(AudioTensorOperation):
1396
+ """
1397
+ Create LFCC for a raw audio signal.
1398
+
1399
+ Note:
1400
+ The shape of the audio waveform to be processed needs to be <..., time>.
1401
+
1402
+ Args:
1403
+ sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
1404
+ n_filter (int, optional) : Number of linear filters to apply. Default: 128.
1405
+ n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: 40.
1406
+ f_min (float, optional): Minimum frequency. Default: 0.0.
1407
+ f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
1408
+ dct_type (int, optional) : Type of DCT to use. The value can only be 2. Default: 2.
1409
+ norm (NormMode, optional) : Norm to use. Default: NormMode.ORTHO.
1410
+ log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: False.
1411
+ speckwargs (dict, optional) : Arguments for :class:`mindspore.dataset.audio.Spectrogram`.
1412
+ Default: None, the default setting is a dict including
1413
+
1414
+ - 'n_fft': 400
1415
+ - 'win_length': n_fft
1416
+ - 'hop_length': win_length // 2
1417
+ - 'pad': 0
1418
+ - 'window': WindowType.HANN
1419
+ - 'power': 2.0
1420
+ - 'normalized': False
1421
+ - 'center': True
1422
+ - 'pad_mode': BorderType.REFLECT
1423
+ - 'onesided': True
1424
+
1425
+ Raises:
1426
+ TypeError: If `sample_rate` is not of type int.
1427
+ TypeError: If `n_filter` is not of type int.
1428
+ TypeError: If `n_lfcc` is not of type int.
1429
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
1430
+ TypeError: If `log_lf` is not of type bool.
1431
+ TypeError: If `speckwargs` is not of type dict.
1432
+ ValueError: If `sample_rate` is 0.
1433
+ ValueError: If `n_lfcc` is less than 0.
1434
+ ValueError: If `f_min` is greater than `f_max` .
1435
+ ValueError: If `f_min` is greater than `sample_rate // 2` when `f_max` is set to None.
1436
+ ValueError: If `dct_type` is not 2.
1437
+
1438
+ Supported Platforms:
1439
+ ``CPU``
1440
+
1441
+ Examples:
1442
+ >>> import numpy as np
1443
+ >>> import mindspore.dataset as ds
1444
+ >>> import mindspore.dataset.audio as audio
1445
+ >>>
1446
+ >>> waveform = np.random.random([1, 1, 300])
1447
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1448
+ >>> transforms = [audio.LFCC()]
1449
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1450
+ """
1451
+
1452
+ @check_lfcc
1453
+ def __init__(self, sample_rate=16000, n_filter=128, n_lfcc=40, f_min=0.0, f_max=None, dct_type=2,
1454
+ norm=NormMode.ORTHO, log_lf=False, speckwargs=None):
1455
+ super().__init__()
1456
+ self.sample_rate = sample_rate
1457
+ self.n_filter = n_filter
1458
+ self.n_lfcc = n_lfcc
1459
+ self.f_min = f_min
1460
+ self.f_max = f_max if f_max is not None else sample_rate // 2
1461
+ self.dct_type = dct_type
1462
+ self.norm = norm
1463
+ self.log_lf = log_lf
1464
+ self.speckwargs = speckwargs
1465
+ if speckwargs is None:
1466
+ self.speckwargs = {}
1467
+ self.speckwargs.setdefault("n_fft", 400)
1468
+ self.speckwargs.setdefault("win_length", self.speckwargs.get("n_fft"))
1469
+ self.speckwargs.setdefault("hop_length", self.speckwargs.get("win_length") // 2)
1470
+ self.speckwargs.setdefault("pad", 0)
1471
+ self.speckwargs.setdefault("window", WindowType.HANN)
1472
+ self.speckwargs.setdefault("power", 2.0)
1473
+ self.speckwargs.setdefault("normalized", False)
1474
+ self.speckwargs.setdefault("center", True)
1475
+ self.speckwargs.setdefault("pad_mode", BorderType.REFLECT)
1476
+ self.speckwargs.setdefault("onesided", True)
1477
+ self.window = self.speckwargs.get("window")
1478
+ self.pad_mode = self.speckwargs.get("pad_mode")
1479
+
1480
+ def parse(self):
1481
+ return cde.LFCCOperation(self.sample_rate, self.n_filter, self.n_lfcc, self.f_min, self.f_max,
1482
+ self.dct_type, DE_C_NORM_MODE.get(self.norm), self.log_lf, self.speckwargs,
1483
+ DE_C_WINDOW_TYPE.get(self.window), DE_C_BORDER_TYPE.get(self.pad_mode))
1484
+
1485
+
1133
1486
  class LFilter(AudioTensorOperation):
1134
1487
  """
1135
- Design two-pole filter for audio waveform of dimension of (..., time).
1488
+ Perform an IIR filter by evaluating different equation.
1136
1489
 
1137
1490
  Args:
1138
- a_coeffs (sequence): denominator coefficients of difference equation of dimension of (n_order + 1).
1491
+ a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
1139
1492
  Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
1140
1493
  Must be same size as b_coeffs (pad with 0's as necessary).
1141
- b_coeffs (sequence): numerator coefficients of difference equation of dimension of (n_order + 1).
1494
+ b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
1142
1495
  Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
1143
1496
  Must be same size as a_coeffs (pad with 0's as necessary).
1144
- clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1] (default=True).
1497
+ clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True.
1498
+
1499
+ Raises:
1500
+ TypeError: If `a_coeffs` is not of type Sequence[float].
1501
+ TypeError: If `b_coeffs` is not of type Sequence[float].
1502
+ ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
1503
+ TypeError: If `clamp` is not of type bool.
1504
+ RuntimeError: If input tensor is not in shape of <..., time>.
1505
+
1506
+ Supported Platforms:
1507
+ ``CPU``
1145
1508
 
1146
1509
  Examples:
1147
1510
  >>> import numpy as np
@@ -1178,7 +1541,7 @@ class LowpassBiquad(AudioTensorOperation):
1178
1541
  Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1179
1542
 
1180
1543
  Note:
1181
- The dimension of the audio waveform to be processed needs to be (..., time).
1544
+ The shape of the audio waveform to be processed needs to be <..., time>.
1182
1545
 
1183
1546
  Args:
1184
1547
  sample_rate (int): Sampling rate (in Hz), which can't be zero.
@@ -1187,7 +1550,7 @@ class LowpassBiquad(AudioTensorOperation):
1187
1550
  in range of (0, 1]. Default: 0.707.
1188
1551
 
1189
1552
  Raises:
1190
- TypeError: If `sample_rate` is not of type integer.
1553
+ TypeError: If `sample_rate` is not of type int.
1191
1554
  ValueError: If `sample_rate` is 0.
1192
1555
  TypeError: If `cutoff_freq` is not of type float.
1193
1556
  TypeError: If `Q` is not of type float.
@@ -1223,7 +1586,13 @@ class Magphase(AudioTensorOperation):
1223
1586
  Separate a complex-valued spectrogram with shape (..., 2) into its magnitude and phase.
1224
1587
 
1225
1588
  Args:
1226
- power (float): Power of the norm, which must be non-negative (default=1.0).
1589
+ power (float): Power of the norm, which must be non-negative. Default: 1.0.
1590
+
1591
+ Raises:
1592
+ RuntimeError: If the shape of input audio waveform does not match (..., 2).
1593
+
1594
+ Supported Platforms:
1595
+ ``CPU``
1227
1596
 
1228
1597
  Examples:
1229
1598
  >>> import numpy as np
@@ -1245,13 +1614,21 @@ class Magphase(AudioTensorOperation):
1245
1614
 
1246
1615
  class MaskAlongAxis(AudioTensorOperation):
1247
1616
  """
1248
- Apply a mask along `axis`. Mask will be applied from indices `[mask_start, mask_start + mask_width)`.
1617
+ Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` .
1249
1618
 
1250
1619
  Args:
1251
1620
  mask_start (int): Starting position of the mask, which must be non negative.
1252
- mask_width (int): The width of the mask, which must be non negative.
1621
+ mask_width (int): The width of the mask, which must be larger than 0.
1253
1622
  mask_value (float): Value to assign to the masked columns.
1254
- axis (int): Axis to apply masking on (1 for frequency and 2 for time).
1623
+ axis (int): Axis to apply mask on (1 for frequency and 2 for time).
1624
+
1625
+ Raises:
1626
+ ValueError: If `mask_start` is invalid (< 0).
1627
+ ValueError: If `mask_width` is invalid (< 1).
1628
+ ValueError: If `axis` is not type of int or not within [1, 2].
1629
+
1630
+ Supported Platforms:
1631
+ ``CPU``
1255
1632
 
1256
1633
  Examples:
1257
1634
  >>> import numpy as np
@@ -1276,15 +1653,27 @@ class MaskAlongAxis(AudioTensorOperation):
1276
1653
 
1277
1654
  class MaskAlongAxisIID(AudioTensorOperation):
1278
1655
  """
1279
- Apply a mask along `axis`. Mask will be applied from indices `[mask_start, mask_start + mask_width)`, where
1280
- `mask_width` is sampled from `uniform[0, mask_param]`, and `mask_start` from `uniform[0, max_length - mask_width]`,
1281
- `max_length` is the number of columns of the specified axis of the spectrogram.
1656
+ Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` , where
1657
+ `mask_width` is sampled from `uniform[0, mask_param]` , and `mask_start` from
1658
+ `uniform[0, max_length - mask_width]` , `max_length` is the number of columns of the specified axis
1659
+ of the spectrogram.
1282
1660
 
1283
1661
  Args:
1284
1662
  mask_param (int): Number of columns to be masked, will be uniformly sampled from
1285
1663
  [0, mask_param], must be non negative.
1286
1664
  mask_value (float): Value to assign to the masked columns.
1287
- axis (int): Axis to apply masking on (1 for frequency and 2 for time).
1665
+ axis (int): Axis to apply mask on (1 for frequency and 2 for time).
1666
+
1667
+ Raises:
1668
+ TypeError: If `mask_param` is not of type int.
1669
+ ValueError: If `mask_param` is a negative value.
1670
+ TypeError: If `mask_value` is not of type float.
1671
+ TypeError: If `axis` is not of type int.
1672
+ ValueError: If `axis` is not in range of [1, 2].
1673
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
1674
+
1675
+ Supported Platforms:
1676
+ ``CPU``
1288
1677
 
1289
1678
  Examples:
1290
1679
  >>> import numpy as np
@@ -1318,15 +1707,32 @@ class MelScale(AudioTensorOperation):
1318
1707
  Convert normal STFT to STFT at the Mel scale.
1319
1708
 
1320
1709
  Args:
1321
- n_mels (int, optional): Number of mel filterbanks (default=128).
1322
- sample_rate (int, optional): Sample rate of audio signal (default=16000).
1323
- f_min (float, optional): Minimum frequency (default=0).
1324
- f_max (float, optional): Maximum frequency (default=None, will be set to sample_rate // 2).
1325
- n_stft (int, optional): Number of bins in STFT (default=201).
1710
+ n_mels (int, optional): Number of mel filterbanks. Default: 128.
1711
+ sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
1712
+ f_min (float, optional): Minimum frequency. Default: 0.0.
1713
+ f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
1714
+ n_stft (int, optional): Number of bins in STFT. Default: 201.
1326
1715
  norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType::NONE.
1327
1716
  If norm is NormType.SLANEY, divide the triangular mel weight by the width of the mel band.
1328
- (default=NormType.NONE).
1329
- mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK (default=MelType.HTK).
1717
+ Default: NormType.NONE, no narmalization.
1718
+ mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
1719
+
1720
+ Raises:
1721
+ TypeError: If `n_mels` is not of type int.
1722
+ ValueError: If `n_mels` is not positive.
1723
+ TypeError: If `sample_rate` is not of type int.
1724
+ ValueError: If `sample_rate` is not positive.
1725
+ TypeError: If `f_min` is not of type float.
1726
+ ValueError: If `f_min` is greater than or equal to `f_max` .
1727
+ TypeError: If `f_max` is not of type float.
1728
+ ValueError: If `f_max` is a negative number.
1729
+ TypeError: If `n_stft` is not of type int.
1730
+ ValueError: If `n_stft` is not positive.
1731
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
1732
+ TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
1733
+
1734
+ Supported Platforms:
1735
+ ``CPU``
1330
1736
 
1331
1737
  Examples:
1332
1738
  >>> import numpy as np
@@ -1339,7 +1745,7 @@ class MelScale(AudioTensorOperation):
1339
1745
  """
1340
1746
 
1341
1747
  @check_mel_scale
1342
- def __init__(self, n_mels=128, sample_rate=16000, f_min=0, f_max=None, n_stft=201, norm=NormType.NONE,
1748
+ def __init__(self, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, n_stft=201, norm=NormType.NONE,
1343
1749
  mel_type=MelType.HTK):
1344
1750
  super().__init__()
1345
1751
  self.n_mels = n_mels
@@ -1355,12 +1761,215 @@ class MelScale(AudioTensorOperation):
1355
1761
  DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
1356
1762
 
1357
1763
 
1764
+ class MelSpectrogram(AudioTensorOperation):
1765
+ r"""
1766
+ Create MelSpectrogram for a raw audio signal.
1767
+
1768
+ Args:
1769
+ sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0. Default: 16000.
1770
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0 and less than
1771
+ twice of the last dimension size of the input. Default: 400.
1772
+ win_length (int, optional): Window size, which should be greater than 0 and no more than `n_fft` . Default:
1773
+ None, will be set to `n_fft` .
1774
+ hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
1775
+ Default: None, will be set to `win_length // 2` .
1776
+ f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: 0.0.
1777
+ f_max (float, optional): Maximum frequency, which can't be less than 0. Default: None, will be set
1778
+ to `sample_rate // 2` .
1779
+ pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: 0.
1780
+ n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: 128.
1781
+ window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
1782
+ frame/window. Default: WindowType.HANN.
1783
+ power (float, optional): Exponent for the magnitude spectrogram, which must be
1784
+ greater than 0, e.g., 1 for energy, 2 for power, etc. Default: 2.0.
1785
+ normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False.
1786
+ center (bool, optional): Whether to pad waveform on both sides. Default: True.
1787
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
1788
+ can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
1789
+ Default: BorderType.REFLECT.
1790
+ onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True.
1791
+ norm (NormType, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
1792
+ (area normalization). Default: NormType.NONE, no narmalization.
1793
+ mel_scale (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
1794
+
1795
+ Raises:
1796
+ TypeError: If `sample_rate` is not of type int.
1797
+ TypeError: If `n_fft` is not of type int.
1798
+ TypeError: If `n_mels` is not of type int.
1799
+ TypeError: If `f_min` is not of type float.
1800
+ TypeError: If `f_max` is not of type float.
1801
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
1802
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
1803
+ TypeError: If `mel_scale` is not of type :class:`mindspore.dataset.audio.MelType` .
1804
+ TypeError: If `power` is not of type float.
1805
+ TypeError: If `normalized` is not of type bool.
1806
+ TypeError: If `center` is not of type bool.
1807
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
1808
+ TypeError: If `onesided` is not of type bool.
1809
+ TypeError: If `pad` is not of type int.
1810
+ TypeError: If `win_length` is not of type int.
1811
+ TypeError: If `hop_length` is not of type int.
1812
+ ValueError: If `sample_rate` is a negative number.
1813
+ ValueError: If `n_fft` is not positive.
1814
+ ValueError: If `n_mels` is a negative number.
1815
+ ValueError: If `f_min` is greater than `f_max` .
1816
+ ValueError: If `f_max` is a negative number.
1817
+ ValueError: If `f_min` is not less than `sample_rate // 2` when `f_max` is set to None.
1818
+ ValueError: If `power` is not positive.
1819
+ ValueError: If `pad` is a negative number.
1820
+ ValueError: If `win_length` is not positive.
1821
+ ValueError: If `hop_length` is not positive.
1822
+
1823
+ Supported Platforms:
1824
+ ``CPU``
1825
+
1826
+ Examples:
1827
+ >>> import numpy as np
1828
+ >>>
1829
+ >>> from mindspore.dataset.audio import WindowType, BorderType, NormType, MelType
1830
+ >>>
1831
+ >>> waveform = np.array([[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4]]])
1832
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1833
+ >>> transforms = [audio.MelSpectrogram(sample_rate=16000, n_fft=16, win_length=16, hop_length=8, f_min=0.0, \
1834
+ ... f_max=5000.0, pad=0, n_mels=8, window=WindowType.HANN, power=2.0, \
1835
+ ... normalized=False, center=True, pad_mode=BorderType.REFLECT, \
1836
+ ... onesided=True, norm=NormType.SLANEY, mel_scale=MelType.HTK)]
1837
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1838
+ """
1839
+
1840
+ @check_mel_spectrogram
1841
+ def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0.0, f_max=None, pad=0,
1842
+ n_mels=128, window=WindowType.HANN, power=2.0, normalized=False, center=True,
1843
+ pad_mode=BorderType.REFLECT, onesided=True, norm=NormType.NONE, mel_scale=MelType.HTK):
1844
+ super().__init__()
1845
+ self.sample_rate = sample_rate
1846
+ self.n_fft = n_fft
1847
+ self.win_length = win_length if win_length is not None else n_fft
1848
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 2
1849
+ self.f_min = f_min
1850
+ self.f_max = f_max if f_max is not None else sample_rate // 2
1851
+ self.pad = pad
1852
+ self.n_mels = n_mels
1853
+ self.window = window
1854
+ self.power = power
1855
+ self.normalized = normalized
1856
+ self.center = center
1857
+ self.pad_mode = pad_mode
1858
+ self.onesided = onesided
1859
+ self.norm = norm
1860
+ self.mel_scale = mel_scale
1861
+
1862
+ def parse(self):
1863
+ return cde.MelSpectrogramOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length, self.f_min,
1864
+ self.f_max, self.pad, self.n_mels, DE_C_WINDOW_TYPE.get(self.window),
1865
+ self.power, self.normalized, self.center,
1866
+ DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided,
1867
+ DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_scale))
1868
+
1869
+
1870
+ class MFCC(AudioTensorOperation):
1871
+ """
1872
+ Create MFCC for a raw audio signal.
1873
+
1874
+ Args:
1875
+ sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: 16000.
1876
+ n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: 40.
1877
+ dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be 2. Default: 2.
1878
+ norm (NormMode, optional): Norm to use. Default: NormMode.ORTHO.
1879
+ log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: False.
1880
+ melkwargs (dict, optional): Arguments for :class:`mindspore.dataset.audio.MelSpectrogram`.
1881
+ Default: None, the default setting is a dict including
1882
+
1883
+ - 'n_fft': 400
1884
+ - 'win_length': n_fft
1885
+ - 'hop_length': win_length // 2
1886
+ - 'f_min': 0.0
1887
+ - 'f_max': sample_rate // 2
1888
+ - 'pad': 0
1889
+ - 'window': WindowType.HANN
1890
+ - 'power': 2.0
1891
+ - 'normalized': False
1892
+ - 'center': True
1893
+ - 'pad_mode': BorderType.REFLECT
1894
+ - 'onesided': True
1895
+ - 'norm': NormType.NONE
1896
+ - 'mel_scale': MelType.HTK
1897
+
1898
+ Raises:
1899
+ TypeError: If `sample_rate` is not of type int.
1900
+ TypeError: If `log_mels` is not of type bool.
1901
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
1902
+ TypeError: If `n_mfcc` is not of type int.
1903
+ TypeError: If `melkwargs` is not of type dict.
1904
+ ValueError: If `sample_rate` is a negative number.
1905
+ ValueError: If `n_mfcc` is a negative number.
1906
+ ValueError: If `dct_type` is not 2.
1907
+
1908
+ Supported Platforms:
1909
+ ``CPU``
1910
+
1911
+ Examples:
1912
+ >>> import numpy as np
1913
+ >>>
1914
+ >>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
1915
+ ... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
1916
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1917
+ >>> transforms = [audio.MFCC(4000, 1500, 2)]
1918
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1919
+ """
1920
+
1921
+ @check_mfcc
1922
+ def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None):
1923
+ super().__init__()
1924
+ self.sample_rate = sample_rate
1925
+ self.n_mfcc = n_mfcc
1926
+ self.dct_type = dct_type
1927
+ self.norm = norm
1928
+ self.log_mels = log_mels
1929
+ self.melkwargs = melkwargs
1930
+ if melkwargs is None:
1931
+ self.melkwargs = {}
1932
+ self.melkwargs.setdefault("n_fft", 400)
1933
+ self.melkwargs.setdefault("win_length", self.melkwargs.get("n_fft"))
1934
+ self.melkwargs.setdefault("hop_length", self.melkwargs.get("win_length") // 2)
1935
+ self.melkwargs.setdefault("f_min", 0.0)
1936
+ self.melkwargs.setdefault("f_max", sample_rate // 2)
1937
+ self.melkwargs.setdefault("pad", 0)
1938
+ self.melkwargs.setdefault("n_mels", 128)
1939
+ self.melkwargs.setdefault("window", WindowType.HANN)
1940
+ self.melkwargs.setdefault("power", 2.0)
1941
+ self.melkwargs.setdefault("normalized", False)
1942
+ self.melkwargs.setdefault("center", True)
1943
+ self.melkwargs.setdefault("pad_mode", BorderType.REFLECT)
1944
+ self.melkwargs.setdefault("onesided", True)
1945
+ self.melkwargs.setdefault("norm", NormType.NONE)
1946
+ self.melkwargs.setdefault("mel_scale", MelType.HTK)
1947
+ self.window = self.melkwargs.get("window")
1948
+ self.pad_mode = self.melkwargs.get("pad_mode")
1949
+ self.norm_mel = self.melkwargs.get("norm")
1950
+ self.mel_scale = self.melkwargs.get("mel_scale")
1951
+
1952
+ def parse(self):
1953
+ return cde.MFCCOperation(self.sample_rate, self.n_mfcc, self.dct_type, DE_C_NORM_MODE.get(self.norm),
1954
+ self.log_mels, self.melkwargs, DE_C_WINDOW_TYPE.get(self.window),
1955
+ DE_C_BORDER_TYPE.get(self.pad_mode), DE_C_NORM_TYPE.get(self.norm_mel),
1956
+ DE_C_MEL_TYPE.get(self.mel_scale))
1957
+
1958
+
1358
1959
  class MuLawDecoding(AudioTensorOperation):
1359
1960
  """
1360
- Decode mu-law encoded signal.
1961
+ Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
1361
1962
 
1362
1963
  Args:
1363
- quantization_channels (int, optional): Number of channels, which must be positive (Default: 256).
1964
+ quantization_channels (int, optional): Number of channels, which must be positive. Default: 256.
1965
+
1966
+ Raises:
1967
+ TypeError: If `quantization_channels` is not of type int.
1968
+ ValueError: If `quantization_channels` is not a positive number.
1969
+ RuntimeError: If input tensor is not in shape of <..., time>.
1970
+
1971
+ Supported Platforms:
1972
+ ``CPU``
1364
1973
 
1365
1974
  Examples:
1366
1975
  >>> import numpy as np
@@ -1385,7 +1994,14 @@ class MuLawEncoding(AudioTensorOperation):
1385
1994
  Encode signal based on mu-law companding.
1386
1995
 
1387
1996
  Args:
1388
- quantization_channels (int, optional): Number of channels, which must be positive (Default: 256).
1997
+ quantization_channels (int, optional): Number of channels, which must be positive. Default: 256.
1998
+
1999
+ Raises:
2000
+ TypeError: If `quantization_channels` is not of type int.
2001
+ ValueError: If `quantization_channels` is not a positive number.
2002
+
2003
+ Supported Platforms:
2004
+ ``CPU``
1389
2005
 
1390
2006
  Examples:
1391
2007
  >>> import numpy as np
@@ -1407,12 +2023,24 @@ class MuLawEncoding(AudioTensorOperation):
1407
2023
 
1408
2024
  class Overdrive(AudioTensorOperation):
1409
2025
  """
1410
- Apply overdrive on input audio.
2026
+ Apply an overdrive effect to the audio waveform.
2027
+
2028
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1411
2029
 
1412
2030
  Args:
1413
- gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100] (default=20.0).
2031
+ gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100]. Default: 20.0.
1414
2032
  color (float, optional): Controls the amount of even harmonic content in the over-driven output,
1415
- in range of [0, 100] (default=20.0).
2033
+ in range of [0, 100]. Default: 20.0.
2034
+
2035
+ Raises:
2036
+ TypeError: If `gain` is not of type float.
2037
+ ValueError: If `gain` is not in range of [0, 100].
2038
+ TypeError: If `color` is not of type float.
2039
+ ValueError: If `color` is not in range of [0, 100].
2040
+ RuntimeError: If input tensor is not in shape of <..., time>.
2041
+
2042
+ Supported Platforms:
2043
+ ``CPU``
1416
2044
 
1417
2045
  Examples:
1418
2046
  >>> import numpy as np
@@ -1437,18 +2065,37 @@ class Phaser(AudioTensorOperation):
1437
2065
  """
1438
2066
  Apply a phasing effect to the audio.
1439
2067
 
2068
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
2069
+
1440
2070
  Args:
1441
2071
  sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
1442
- gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB.
1443
- Allowed range of values is [0, 1] (default=0.4).
1444
- gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB.
1445
- Allowed range of values is [0, 1e9] (default=0.74).
1446
- delay_ms (float, optional): Desired delay in milli seconds. Allowed range of values is [0, 5] (default=3.0).
1447
- decay (float, optional): Desired decay relative to gain-in. Allowed range of values is [0, 0.99] (default=0.4).
1448
- mod_speed (float, optional): Modulation speed in Hz. Allowed range of values is [0.1, 2] (default=0.5).
2072
+ gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB,
2073
+ in range of [0.0, 1.0]. Default: 0.4.
2074
+ gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB,
2075
+ in range of [0.0, 1e9]. Default: 0.74.
2076
+ delay_ms (float, optional): Desired delay in milliseconds, in range of [0.0, 5.0]. Default: 3.0.
2077
+ decay (float, optional): Desired decay relative to gain-in, in range of [0.0, 0.99]. Default: 0.4.
2078
+ mod_speed (float, optional): Modulation speed in Hz, in range of [0.1, 2.0]. Default: 0.5.
1449
2079
  sinusoidal (bool, optional): If True, use sinusoidal modulation (preferable for multiple instruments).
1450
- If False, use triangular modulation (gives single instruments a sharper
1451
- phasing effect) (default=True).
2080
+ If False, use triangular modulation (gives single instruments a sharper phasing effect). Default: True.
2081
+
2082
+ Raises:
2083
+ TypeError: If `sample_rate` is not of type int.
2084
+ TypeError: If `gain_in` is not of type float.
2085
+ ValueError: If `gain_in` is not in range of [0.0, 1.0].
2086
+ TypeError: If `gain_out` is not of type float.
2087
+ ValueError: If `gain_out` is not in range of [0.0, 1e9].
2088
+ TypeError: If `delay_ms` is not of type float.
2089
+ ValueError: If `delay_ms` is not in range of [0.0, 5.0].
2090
+ TypeError: If `decay` is not of type float.
2091
+ ValueError: If `decay` is not in range of [0.0, 0.99].
2092
+ TypeError: If `mod_speed` is not of type float.
2093
+ ValueError: If `mod_speed` is not in range of [0.1, 2.0].
2094
+ TypeError: If `sinusoidal` is not of type bool.
2095
+ RuntimeError: If input tensor is not in shape of <..., time>.
2096
+
2097
+ Supported Platforms:
2098
+ ``CPU``
1452
2099
 
1453
2100
  Examples:
1454
2101
  >>> import numpy as np
@@ -1478,11 +2125,20 @@ class Phaser(AudioTensorOperation):
1478
2125
 
1479
2126
  class PhaseVocoder(AudioTensorOperation):
1480
2127
  """
1481
- Given a STFT tensor, speed up in time without modifying pitch by a factor of rate.
2128
+ Given a STFT spectrogram, speed up in time without modifying pitch by a factor of rate.
1482
2129
 
1483
2130
  Args:
1484
2131
  rate (float): Speed-up factor.
1485
- phase_advance (numpy.ndarray): Expected phase advance in each bin in shape of (freq, 1).
2132
+ phase_advance (numpy.ndarray): Expected phase advance in each bin, in shape of (freq, 1).
2133
+
2134
+ Raises:
2135
+ TypeError: If `rate` is not of type float.
2136
+ ValueError: If `rate` is not a positive number.
2137
+ TypeError: If `phase_advance` is not of type :class:`numpy.ndarray` .
2138
+ RuntimeError: If input tensor is not in shape of <..., freq, num_frame, complex=2>.
2139
+
2140
+ Supported Platforms:
2141
+ ``CPU``
1486
2142
 
1487
2143
  Examples:
1488
2144
  >>> import numpy as np
@@ -1504,6 +2160,68 @@ class PhaseVocoder(AudioTensorOperation):
1504
2160
  return cde.PhaseVocoderOperation(self.rate, self.phase_advance)
1505
2161
 
1506
2162
 
2163
+ class PitchShift(AudioTensorOperation):
2164
+ """
2165
+ Shift the pitch of a waveform by `n_steps` steps.
2166
+
2167
+ Args:
2168
+ sample_rate (int): Sampling rate of waveform (in Hz).
2169
+ n_steps (int): The steps to shift waveform.
2170
+ bins_per_octave (int, optional): The number of steps per octave. Default: 12.
2171
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 512.
2172
+ win_length (int, optional): Window size. Default: None, will be set to `n_fft` .
2173
+ hop_length (int, optional): Length of hop between STFT windows. Default: None,
2174
+ will be set to `win_length // 4` .
2175
+ window (WindowType, optional): Window tensor that is applied/multiplied to each frame/window.
2176
+ Default: WindowType.HANN.
2177
+
2178
+ Raises:
2179
+ TypeError: If `sample_rate` is not of type int.
2180
+ TypeError: If `n_steps` is not of type int.
2181
+ TypeError: If `bins_per_octave` is not of type int.
2182
+ TypeError: If `n_fft` is not of type int.
2183
+ TypeError: If `win_length` is not of type int.
2184
+ TypeError: If `hop_length` is not of type int.
2185
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
2186
+ ValueError: If `sample_rate` is a negative number.
2187
+ ValueError: If `bins_per_octave` is 0.
2188
+ ValueError: If `n_fft` is a negative number.
2189
+ ValueError: If `win_length` is not positive.
2190
+ ValueError: If `hop_length` is not positive.
2191
+
2192
+ Supported Platforms:
2193
+ ``CPU``
2194
+
2195
+ Examples:
2196
+ >>> import numpy as np
2197
+ >>>
2198
+ >>> import mindspore.dataset as ds
2199
+ >>> import mindspore.dataset.audio as audio
2200
+ >>> from mindspore.dataset.audio import WindowType
2201
+ >>>
2202
+ >>> waveform = np.random.random([1, 1, 300])
2203
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2204
+ >>> transforms = [audio.PitchShift(sample_rate=16000,n_steps=4)]
2205
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2206
+ """
2207
+
2208
+ @check_pitch_shift
2209
+ def __init__(self, sample_rate, n_steps, bins_per_octave=12, n_fft=512, win_length=None,
2210
+ hop_length=None, window=WindowType.HANN):
2211
+ super().__init__()
2212
+ self.sample_rate = sample_rate
2213
+ self.n_steps = n_steps
2214
+ self.bins_per_octave = bins_per_octave
2215
+ self.n_fft = n_fft
2216
+ self.win_length = win_length if win_length is not None else n_fft
2217
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 4
2218
+ self.window = window
2219
+
2220
+ def parse(self):
2221
+ return cde.PitchShiftOperation(self.sample_rate, self.n_steps, self.bins_per_octave, self.n_fft,
2222
+ self.win_length, self.hop_length, DE_C_WINDOW_TYPE.get(self.window))
2223
+
2224
+
1507
2225
  DE_C_RESAMPLE_METHOD = {ResampleMethod.SINC_INTERPOLATION: cde.ResampleMethod.DE_RESAMPLE_SINC_INTERPOLATION,
1508
2226
  ResampleMethod.KAISER_WINDOW: cde.ResampleMethod.DE_RESAMPLE_KAISER_WINDOW}
1509
2227
 
@@ -1513,16 +2231,30 @@ class Resample(AudioTensorOperation):
1513
2231
  Resample a signal from one frequency to another. A resample method can be given.
1514
2232
 
1515
2233
  Args:
1516
- orig_freq (float, optional): The original frequency of the signal, which must be positive (default=16000).
1517
- new_freq (float, optional): The desired frequency, which must be positive (default=16000).
1518
- resample_method (ResampleMethod, optional): The resample method, which can be
1519
- ResampleMethod.SINC_INTERPOLATION and ResampleMethod.KAISER_WINDOW
1520
- (default=ResampleMethod.SINC_INTERPOLATION).
1521
- lowpass_filter_width (int, optional): Controls the shaperness of the filter, more means sharper but less
1522
- efficient, which must be positive (default=6).
2234
+ orig_freq (float, optional): The original frequency of the signal, must be positive. Default: 16000.
2235
+ new_freq (float, optional): The desired frequency, must be positive. Default: 16000.
2236
+ resample_method (ResampleMethod, optional): The resample method to use, can be ResampleMethod.SINC_INTERPOLATION
2237
+ or ResampleMethod.KAISER_WINDOW. Default: ResampleMethod.SINC_INTERPOLATION.
2238
+ lowpass_filter_width (int, optional): Controls the sharpness of the filter, more means sharper but less
2239
+ efficient, must be positive. Default: 6.
1523
2240
  rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist. Lower values
1524
- reduce anti-aliasing, but also reduce some of the highest frequencies, range: (0, 1] (default=0.99).
1525
- beta (float, optional): The shape parameter used for kaiser window (default=None, will use 14.769656459379492).
2241
+ reduce anti-aliasing, but also reduce some of the highest frequencies, in range of (0, 1]. Default: 0.99.
2242
+ beta (float, optional): The shape parameter used for kaiser window. Default: None, will use 14.769656459379492.
2243
+
2244
+ Raises:
2245
+ TypeError: If `orig_freq` is not of type float.
2246
+ ValueError: If `orig_freq` is not a positive number.
2247
+ TypeError: If `new_freq` is not of type float.
2248
+ ValueError: If `new_freq` is not a positive number.
2249
+ TypeError: If `resample_method` is not of type :class:`mindspore.dataset.audio.ResampleMethod` .
2250
+ TypeError: If `lowpass_filter_width` is not of type int.
2251
+ ValueError: If `lowpass_filter_width` is not a positive number.
2252
+ TypeError: If `rolloff` is not of type float.
2253
+ ValueError: If `rolloff` is not in range of (0, 1].
2254
+ RuntimeError: If input tensor is not in shape of <..., time>.
2255
+
2256
+ Supported Platforms:
2257
+ ``CPU``
1526
2258
 
1527
2259
  Examples:
1528
2260
  >>> import numpy as np
@@ -1555,12 +2287,21 @@ class Resample(AudioTensorOperation):
1555
2287
 
1556
2288
  class RiaaBiquad(AudioTensorOperation):
1557
2289
  """
1558
- Apply RIAA vinyl playback equalization. Similar to SoX implementation.
2290
+ Apply RIAA vinyl playback equalization.
2291
+
2292
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1559
2293
 
1560
2294
  Args:
1561
2295
  sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
1562
2296
  can only be one of 44100, 48000, 88200, 96000.
1563
2297
 
2298
+ Raises:
2299
+ TypeError: If `sample_rate` is not of type int.
2300
+ ValueError: If `sample_rate` is not any of [44100, 48000, 88200, 96000].
2301
+
2302
+ Supported Platforms:
2303
+ ``CPU``
2304
+
1564
2305
  Examples:
1565
2306
  >>> import numpy as np
1566
2307
  >>>
@@ -1584,12 +2325,23 @@ class SlidingWindowCmn(AudioTensorOperation):
1584
2325
  Apply sliding-window cepstral mean (and optionally variance) normalization per utterance.
1585
2326
 
1586
2327
  Args:
1587
- cmn_window (int, optional): Window in frames for running average CMN computation (default=600).
2328
+ cmn_window (int, optional): Window in frames for running average CMN computation. Default: 600.
1588
2329
  min_cmn_window (int, optional): Minimum CMN window used at start of decoding (adds latency only at start).
1589
- Only applicable if center is False, ignored if center is True (default=100).
2330
+ Only applicable if center is False, ignored if center is True. Default: 100.
1590
2331
  center (bool, optional): If True, use a window centered on the current frame. If False, window is
1591
- to the left. (default=False).
1592
- norm_vars (bool, optional): If True, normalize variance to one. (default=False).
2332
+ to the left. Default: False.
2333
+ norm_vars (bool, optional): If True, normalize variance to one. Default: False.
2334
+
2335
+ Raises:
2336
+ TypeError: If `cmn_window` is not of type int.
2337
+ ValueError: If `cmn_window` is a negative number.
2338
+ TypeError: If `min_cmn_window` is not of type int.
2339
+ ValueError: If `min_cmn_window` is a negative number.
2340
+ TypeError: If `center` is not of type bool.
2341
+ TypeError: If `norm_vars` is not of type bool.
2342
+
2343
+ Supported Platforms:
2344
+ ``CPU``
1593
2345
 
1594
2346
  Examples:
1595
2347
  >>> import numpy as np
@@ -1621,17 +2373,35 @@ DE_C_WINDOW_TYPE = {WindowType.BARTLETT: cde.WindowType.DE_WINDOW_TYPE_BARTLETT,
1621
2373
 
1622
2374
  class SpectralCentroid(TensorOperation):
1623
2375
  """
1624
- Create a spectral centroid from an audio signal.
2376
+ Compute the spectral centroid for each channel along the time axis.
1625
2377
 
1626
2378
  Args:
1627
- sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
1628
- n_fft (int, optional): Size of FFT, creates n_fft // 2 + 1 bins (default=400).
1629
- win_length (int, optional): Window size (default=None, will use n_fft).
1630
- hop_length (int, optional): Length of hop between STFT windows (default=None, will use win_length // 2).
1631
- pad (int, optional): Two sided padding of signal (default=0).
2379
+ sample_rate (int): Sampling rate of audio signal, e.g. 44100 (Hz).
2380
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400.
2381
+ win_length (int, optional): Window size. Default: None, will use `n_fft` .
2382
+ hop_length (int, optional): Length of hop between STFT windows. Default: None, will use `win_length // 2` .
2383
+ pad (int, optional): Two sided padding of signal. Default: 0.
1632
2384
  window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
1633
- which can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
1634
- or WindowType.KAISER (default=WindowType.HANN).
2385
+ can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
2386
+ or WindowType.KAISER. Default: WindowType.HANN.
2387
+
2388
+ Raises:
2389
+ TypeError: If `sample_rate` is not of type int.
2390
+ ValueError: If `sample_rate` is a negative number.
2391
+ TypeError: If `n_fft` is not of type int.
2392
+ ValueError: If `n_fft` is not a positive number.
2393
+ TypeError: If `win_length` is not of type int.
2394
+ ValueError: If `win_length` is not a positive number.
2395
+ ValueError: If `win_length` is greater than `n_fft` .
2396
+ TypeError: If `hop_length` is not of type int.
2397
+ ValueError: If `hop_length` is not a positive number.
2398
+ TypeError: If `pad` is not of type int.
2399
+ ValueError: If `pad` is a negative number.
2400
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
2401
+ RuntimeError: If input tensor is not in shape of <..., time>.
2402
+
2403
+ Supported Platforms:
2404
+ ``CPU``
1635
2405
 
1636
2406
  Examples:
1637
2407
  >>> import numpy as np
@@ -1662,21 +2432,43 @@ class Spectrogram(TensorOperation):
1662
2432
  Create a spectrogram from an audio signal.
1663
2433
 
1664
2434
  Args:
1665
- n_fft (int, optional): Size of FFT, creates n_fft // 2 + 1 bins (default=400).
1666
- win_length (int, optional): Window size (default=None, will use n_fft).
1667
- hop_length (int, optional): Length of hop between STFT windows (default=None, will use win_length // 2).
1668
- pad (int): Two sided padding of signal (default=0).
2435
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400.
2436
+ win_length (int, optional): Window size. Default: None, will use `n_fft` .
2437
+ hop_length (int, optional): Length of hop between STFT windows. Default: None, will use `win_length // 2` .
2438
+ pad (int, optional): Two sided padding of signal. Default: 0.
1669
2439
  window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
1670
- which can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
1671
- or WindowType.KAISER (default=WindowType.HANN). Currently kaiser window is not supported on macOS.
1672
- power (float, optional): Exponent for the magnitude spectrogram, which must be greater
1673
- than or equal to 0, e.g., 1 for energy, 2 for power, etc. (default=2.0).
1674
- normalized (bool, optional): Whether to normalize by magnitude after stft (default=False).
1675
- center (bool, optional): Whether to pad waveform on both sides (default=True).
1676
- pad_mode (BorderType, optional): Controls the padding method used when center is True,
1677
- which can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE, BorderType.SYMMETRIC
1678
- (default=BorderType.REFLECT).
1679
- onesided (bool, optional): Controls whether to return half of results to avoid redundancy (default=True).
2440
+ can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
2441
+ or WindowType.KAISER. Currently, Kaiser window is not supported on macOS. Default: WindowType.HANN.
2442
+ power (float, optional): Exponent for the magnitude spectrogram, must be non negative,
2443
+ e.g., 1 for energy, 2 for power, etc. Default: 2.0.
2444
+ normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False.
2445
+ center (bool, optional): Whether to pad waveform on both sides. Default: True.
2446
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
2447
+ can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
2448
+ Default: BorderType.REFLECT.
2449
+ onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True.
2450
+
2451
+ Raises:
2452
+ TypeError: If `n_fft` is not of type int.
2453
+ ValueError: If `n_fft` is not a positive number.
2454
+ TypeError: If `win_length` is not of type int.
2455
+ ValueError: If `win_length` is not a positive number.
2456
+ ValueError: If `win_length` is greater than `n_fft` .
2457
+ TypeError: If `hop_length` is not of type int.
2458
+ ValueError: If `hop_length` is not a positive number.
2459
+ TypeError: If `pad` is not of type int.
2460
+ ValueError: If `pad` is a negative number.
2461
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
2462
+ TypeError: If `power` is not of type float.
2463
+ ValueError: If `power` is a negative number.
2464
+ TypeError: If `normalized` is not of type bool.
2465
+ TypeError: If `center` is not of type bool.
2466
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
2467
+ TypeError: If `onesided` is not of type bool.
2468
+ RuntimeError: If input tensor is not in shape of <..., time>.
2469
+
2470
+ Supported Platforms:
2471
+ ``CPU``
1680
2472
 
1681
2473
  Examples:
1682
2474
  >>> import numpy as np
@@ -1713,7 +2505,7 @@ class TimeMasking(AudioTensorOperation):
1713
2505
  Apply masking to a spectrogram in the time domain.
1714
2506
 
1715
2507
  Note:
1716
- The dimension of the audio waveform to be processed needs to be (..., freq, time).
2508
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
1717
2509
 
1718
2510
  Args:
1719
2511
  iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
@@ -1769,20 +2561,20 @@ class TimeStretch(AudioTensorOperation):
1769
2561
  Stretch Short Time Fourier Transform (STFT) in time without modifying pitch for a given rate.
1770
2562
 
1771
2563
  Note:
1772
- The dimension of the audio waveform to be processed needs to be (..., freq, time, complex=2).
2564
+ The shape of the audio waveform to be processed needs to be <..., freq, time, complex=2>.
1773
2565
  The first dimension represents the real part while the second represents the imaginary.
1774
2566
 
1775
2567
  Args:
1776
2568
  hop_length (int, optional): Length of hop between STFT windows, i.e. the number of samples
1777
- between consecutive frames. Default: None, will use `n_freq - 1`.
2569
+ between consecutive frames. Default: None, will use `n_freq - 1` .
1778
2570
  n_freq (int, optional): Number of filter banks from STFT. Default: 201.
1779
2571
  fixed_rate (float, optional): Rate to speed up or slow down by. Default: None, will keep
1780
2572
  the original rate.
1781
2573
 
1782
2574
  Raises:
1783
- TypeError: If `hop_length` is not of type integer.
2575
+ TypeError: If `hop_length` is not of type int.
1784
2576
  ValueError: If `hop_length` is not a positive number.
1785
- TypeError: If `n_freq` is not of type integer.
2577
+ TypeError: If `n_freq` is not of type int.
1786
2578
  ValueError: If `n_freq` is not a positive number.
1787
2579
  TypeError: If `fixed_rate` is not of type float.
1788
2580
  ValueError: If `fixed_rate` is not a positive number.
@@ -1822,13 +2614,28 @@ class TimeStretch(AudioTensorOperation):
1822
2614
 
1823
2615
  class TrebleBiquad(AudioTensorOperation):
1824
2616
  """
1825
- Design a treble tone-control effect. Similar to SoX implementation.
2617
+ Design a treble tone-control effect.
2618
+
2619
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1826
2620
 
1827
2621
  Args:
1828
- sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
2622
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
1829
2623
  gain (float): Desired gain at the boost (or attenuation) in dB.
1830
- central_freq (float, optional): Central frequency (in Hz) (default=3000).
1831
- Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
2624
+ central_freq (float, optional): Central frequency (in Hz). Default: 3000.
2625
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
2626
+ in range of (0, 1]. Default: 0.707.
2627
+
2628
+ Raises:
2629
+ TypeError: If `sample_rate` is not of type int.
2630
+ ValueError: If `sample_rate` is 0.
2631
+ TypeError: If `gain` is not of type float.
2632
+ TypeError: If `central_freq` is not of type float.
2633
+ TypeError: If `Q` is not of type float.
2634
+ ValueError: If `Q` is not in range of (0, 1].
2635
+ RuntimeError: If input tensor is not in shape of <..., time>.
2636
+
2637
+ Supported Platforms:
2638
+ ``CPU``
1832
2639
 
1833
2640
  Examples:
1834
2641
  >>> import numpy as np
@@ -1853,37 +2660,82 @@ class TrebleBiquad(AudioTensorOperation):
1853
2660
 
1854
2661
  class Vad(AudioTensorOperation):
1855
2662
  """
1856
- Attempt to trim silent background sounds from the end of the voice recording.
2663
+ Voice activity detector.
2664
+
2665
+ Attempt to trim silence and quiet background sounds from the ends of recordings of speech.
2666
+
2667
+ Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
1857
2668
 
1858
2669
  Args:
1859
- sample_rate (int): Sample rate of audio signal.
1860
- trigger_level (float, optional): The measurement level used to trigger activity detection (default=7.0).
1861
- trigger_time (float, optional): The time constant (in seconds) used to help ignore short sounds (default=0.25).
1862
- search_time (float, optional): The amount of audio (in seconds) to search for quieter/shorter sounds to include
1863
- prior to the detected trigger point (default=1.0).
1864
- allowed_gap (float, optional): The allowed gap (in seconds) between quiteter/shorter sounds to include prior to
1865
- the detected trigger point (default=0.25).
2670
+ sample_rate (int): Sampling rate of audio signal.
2671
+ trigger_level (float, optional): The measurement level used to trigger activity detection. Default: 7.0.
2672
+ trigger_time (float, optional): The time constant (in seconds) used to help ignore short bursts of
2673
+ sounds. Default: 0.25.
2674
+ search_time (float, optional): The amount of audio (in seconds) to search for quieter/shorter bursts of audio
2675
+ to include prior to the detected trigger point. Default: 1.0.
2676
+ allowed_gap (float, optional): The allowed gap (in seconds) between quieter/shorter bursts of audio to include
2677
+ prior to the detected trigger point. Default: 0.25.
1866
2678
  pre_trigger_time (float, optional): The amount of audio (in seconds) to preserve before the trigger point and
1867
- any found quieter/shorter bursts (default=0.0).
1868
- boot_time (float, optional): The time for the initial noise estimate (default=0.35).
1869
- noise_up_time (float, optional): Time constant used by the adaptive noise estimator, when the noise level is
1870
- increasing (default=0.1).
1871
- noise_down_time (float, optional): Time constant used by the adaptive noise estimator, when the noise level is
1872
- decreasing (default=0.01).
1873
- noise_reduction_amount (float, optional): The amount of noise reduction used in the detection algorithm
1874
- (default=1.35).
1875
- measure_freq (float, optional): The frequency of the algorithms processing (default=20.0).
1876
- measure_duration (float, optional): The duration of measurement (default=None, use twice the measurement
1877
- period).
1878
- measure_smooth_time (float, optional): The time constant used to smooth spectral measurements (default=0.4).
1879
- hp_filter_freq (float, optional): The "Brick-wall" frequency of high-pass filter applied at the input to the
1880
- detector algorithm (default=50.0).
1881
- lp_filter_freq (float, optional): The "Brick-wall" frequency of low-pass filter applied at the input to the
1882
- detector algorithm (default=6000.0).
1883
- hp_lifter_freq (float, optional): The "Brick-wall" frequency of high-pass lifter applied at the input to the
1884
- detector algorithm (default=150.0).
1885
- lp_lifter_freq (float, optional): The "Brick-wall" frequency of low-pass lifter applied at the input to the
1886
- detector algorithm (default=2000.0).
2679
+ any found quieter/shorter bursts. Default: 0.0.
2680
+ boot_time (float, optional): The time for the initial noise estimate. Default: 0.35.
2681
+ noise_up_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level is
2682
+ increasing. Default: 0.1.
2683
+ noise_down_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level
2684
+ is decreasing. Default: 0.01.
2685
+ noise_reduction_amount (float, optional): Amount of noise reduction to use in the detection algorithm.
2686
+ Default: 1.35.
2687
+ measure_freq (float, optional): Frequency of the algorithm's processing/measurements. Default: 20.0.
2688
+ measure_duration (float, optional): The duration of measurement. Default: None, will use twice the measurement
2689
+ period.
2690
+ measure_smooth_time (float, optional): Time constant used to smooth spectral measurements. Default: 0.4.
2691
+ hp_filter_freq (float, optional): The 'Brick-wall' frequency of high-pass filter applied at the input to the
2692
+ detector algorithm. Default: 50.0.
2693
+ lp_filter_freq (float, optional): The 'Brick-wall' frequency of low-pass filter applied at the input to the
2694
+ detector algorithm. Default: 6000.0.
2695
+ hp_lifter_freq (float, optional): The 'Brick-wall' frequency of high-pass lifter used in the
2696
+ detector algorithm. Default: 150.0.
2697
+ lp_lifter_freq (float, optional): The 'Brick-wall' frequency of low-pass lifter used in the
2698
+ detector algorithm. Default: 2000.0.
2699
+
2700
+ Raises:
2701
+ TypeError: If `sample_rate` is not of type int.
2702
+ ValueError: If `sample_rate` is not a positive number.
2703
+ TypeError: If `trigger_level` is not of type float.
2704
+ TypeError: If `trigger_time` is not of type float.
2705
+ ValueError: If `trigger_time` is a negative number.
2706
+ TypeError: If `search_time` is not of type float.
2707
+ ValueError: If `search_time` is a negative number.
2708
+ TypeError: If `allowed_gap` is not of type float.
2709
+ ValueError: If `allowed_gap` is a negative number.
2710
+ TypeError: If `pre_trigger_time` is not of type float.
2711
+ ValueError: If `pre_trigger_time` is a negative number.
2712
+ TypeError: If `boot_time` is not of type float.
2713
+ ValueError: If `boot_time` is a negative number.
2714
+ TypeError: If `noise_up_time` is not of type float.
2715
+ ValueError: If `noise_up_time` is a negative number.
2716
+ TypeError: If `noise_down_time` is not of type float.
2717
+ ValueError: If `noise_down_time` is a negative number.
2718
+ ValueError: If `noise_up_time` is less than `noise_down_time` .
2719
+ TypeError: If `noise_reduction_amount` is not of type float.
2720
+ ValueError: If `noise_reduction_amount` is a negative number.
2721
+ TypeError: If `measure_freq` is not of type float.
2722
+ ValueError: If `measure_freq` is not a positive number.
2723
+ TypeError: If `measure_duration` is not of type float.
2724
+ ValueError: If `measure_duration` is a negative number.
2725
+ TypeError: If `measure_smooth_time` is not of type float.
2726
+ ValueError: If `measure_smooth_time` is a negative number.
2727
+ TypeError: If `hp_filter_freq` is not of type float.
2728
+ ValueError: If `hp_filter_freq` is not a positive number.
2729
+ TypeError: If `lp_filter_freq` is not of type float.
2730
+ ValueError: If `lp_filter_freq` is not a positive number.
2731
+ TypeError: If `hp_lifter_freq` is not of type float.
2732
+ ValueError: If `hp_lifter_freq` is not a positive number.
2733
+ TypeError: If `lp_lifter_freq` is not of type float.
2734
+ ValueError: If `lp_lifter_freq` is not a positive number.
2735
+ RuntimeError: If input tensor is not in shape of <..., time>.
2736
+
2737
+ Supported Platforms:
2738
+ ``CPU``
1887
2739
 
1888
2740
  Examples:
1889
2741
  >>> import numpy as np
@@ -1933,15 +2785,25 @@ DE_C_GAIN_TYPE = {GainType.AMPLITUDE: cde.GainType.DE_GAIN_TYPE_AMPLITUDE,
1933
2785
 
1934
2786
  class Vol(AudioTensorOperation):
1935
2787
  """
1936
- Apply amplification or attenuation to the whole waveform.
2788
+ Adjust volume of waveform.
1937
2789
 
1938
2790
  Args:
1939
- gain (float): Value of gain adjustment.
1940
- If gain_type = amplitude, gain stands for nonnegative amplitude ratio.
1941
- If gain_type = power, gain stands for power.
1942
- If gain_type = db, gain stands for decibels.
1943
- gain_type (GainType, optional): Type of gain, contains the following three enumeration values
1944
- GainType.AMPLITUDE, GainType.POWER and GainType.DB (default=GainType.AMPLITUDE).
2791
+ gain (float): Gain at the boost (or attenuation).
2792
+ If `gain_type` is GainType.AMPLITUDE, it is a non negative amplitude ratio.
2793
+ If `gain_type` is GainType.POWER, it is a power (voltage squared).
2794
+ If `gain_type` is GainType.DB, it is in decibels.
2795
+ gain_type (GainType, optional): Type of gain, can be GainType.AMPLITUDE, GainType.POWER
2796
+ or GainType.DB. Default: GainType.AMPLITUDE.
2797
+
2798
+ Raises:
2799
+ TypeError: If `gain` is not of type float.
2800
+ TypeError: If `gain_type` is not of type :class:`mindspore.dataset.audio.GainType` .
2801
+ ValueError: If `gain` is a negative number when `gain_type` is GainType.AMPLITUDE.
2802
+ ValueError: If `gain` is not a positive number when `gain_type` is GainType.POWER.
2803
+ RuntimeError: If input tensor is not in shape of <..., time>.
2804
+
2805
+ Supported Platforms:
2806
+ ``CPU``
1945
2807
 
1946
2808
  Examples:
1947
2809
  >>> import numpy as np