mindspore 2.4.0__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (1387) hide show
  1. mindspore/.commit_id +1 -0
  2. mindspore/__init__.py +53 -0
  3. mindspore/_c_dataengine.cpython-310-darwin.so +0 -0
  4. mindspore/_c_expression.cpython-310-darwin.so +0 -0
  5. mindspore/_c_mindrecord.cpython-310-darwin.so +0 -0
  6. mindspore/_check_jit_forbidden_api.py +106 -0
  7. mindspore/_checkparam.py +1419 -0
  8. mindspore/_extends/__init__.py +23 -0
  9. mindspore/_extends/builtin_operations.py +224 -0
  10. mindspore/_extends/graph_kernel/__init__.py +17 -0
  11. mindspore/_extends/graph_kernel/model/__init__.py +19 -0
  12. mindspore/_extends/graph_kernel/model/graph_parallel.py +311 -0
  13. mindspore/_extends/graph_kernel/model/graph_split.py +1348 -0
  14. mindspore/_extends/graph_kernel/model/model.py +553 -0
  15. mindspore/_extends/graph_kernel/model/model_builder.py +216 -0
  16. mindspore/_extends/graph_kernel/parallel_estimate.py +60 -0
  17. mindspore/_extends/graph_kernel/splitter.py +140 -0
  18. mindspore/_extends/graph_kernel/utils.py +28 -0
  19. mindspore/_extends/parallel_compile/__init__.py +19 -0
  20. mindspore/_extends/parallel_compile/akg_compiler/__init__.py +19 -0
  21. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +269 -0
  22. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +529 -0
  23. mindspore/_extends/parallel_compile/akg_compiler/compiler.py +56 -0
  24. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
  25. mindspore/_extends/parallel_compile/akg_compiler/get_file_path.py +36 -0
  26. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +556 -0
  27. mindspore/_extends/parallel_compile/akg_compiler/util.py +159 -0
  28. mindspore/_extends/parse/__init__.py +49 -0
  29. mindspore/_extends/parse/compile_config.py +299 -0
  30. mindspore/_extends/parse/namespace.py +136 -0
  31. mindspore/_extends/parse/parser.py +1448 -0
  32. mindspore/_extends/parse/resources.py +213 -0
  33. mindspore/_extends/parse/standard_method.py +4475 -0
  34. mindspore/_extends/parse/trope.py +97 -0
  35. mindspore/_extends/pijit/__init__.py +23 -0
  36. mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
  37. mindspore/_extends/remote/__init__.py +19 -0
  38. mindspore/_extends/remote/kernel_build_server.py +199 -0
  39. mindspore/_extends/remote/kernel_build_server_akg.py +55 -0
  40. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  41. mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
  42. mindspore/_extends/utils.py +68 -0
  43. mindspore/_install_custom.py +43 -0
  44. mindspore/_profiler.py +30 -0
  45. mindspore/amp.py +433 -0
  46. mindspore/boost/__init__.py +42 -0
  47. mindspore/boost/adasum.py +319 -0
  48. mindspore/boost/base.py +535 -0
  49. mindspore/boost/boost.py +400 -0
  50. mindspore/boost/boost_cell_wrapper.py +790 -0
  51. mindspore/boost/dim_reduce.py +323 -0
  52. mindspore/boost/grad_accumulation.py +79 -0
  53. mindspore/boost/grad_freeze.py +382 -0
  54. mindspore/boost/group_loss_scale_manager.py +166 -0
  55. mindspore/boost/less_batch_normalization.py +174 -0
  56. mindspore/common/__init__.py +86 -0
  57. mindspore/common/_auto_dynamic.py +68 -0
  58. mindspore/common/_decorator.py +50 -0
  59. mindspore/common/_jit_fallback_utils.py +110 -0
  60. mindspore/common/_monad.py +25 -0
  61. mindspore/common/_pijit_context.py +190 -0
  62. mindspore/common/_register_for_adapter.py +74 -0
  63. mindspore/common/_register_for_recompute.py +48 -0
  64. mindspore/common/_register_for_tensor.py +46 -0
  65. mindspore/common/_stub_tensor.py +210 -0
  66. mindspore/common/_tensor_overload.py +139 -0
  67. mindspore/common/_utils.py +122 -0
  68. mindspore/common/api.py +2064 -0
  69. mindspore/common/auto_dynamic_shape.py +507 -0
  70. mindspore/common/dtype.py +422 -0
  71. mindspore/common/dump.py +130 -0
  72. mindspore/common/file_system.py +48 -0
  73. mindspore/common/generator.py +254 -0
  74. mindspore/common/hook_handle.py +143 -0
  75. mindspore/common/initializer.py +880 -0
  76. mindspore/common/jit_config.py +98 -0
  77. mindspore/common/lazy_inline.py +240 -0
  78. mindspore/common/mindir_util.py +111 -0
  79. mindspore/common/mutable.py +234 -0
  80. mindspore/common/no_inline.py +54 -0
  81. mindspore/common/np_dtype.py +25 -0
  82. mindspore/common/parameter.py +1081 -0
  83. mindspore/common/recompute.py +292 -0
  84. mindspore/common/seed.py +260 -0
  85. mindspore/common/sparse_tensor.py +1175 -0
  86. mindspore/common/symbol.py +122 -0
  87. mindspore/common/tensor.py +5039 -0
  88. mindspore/communication/__init__.py +37 -0
  89. mindspore/communication/_comm_helper.py +501 -0
  90. mindspore/communication/_hccl_management.py +297 -0
  91. mindspore/communication/comm_func.py +1395 -0
  92. mindspore/communication/management.py +673 -0
  93. mindspore/config/op_info.config +533 -0
  94. mindspore/context.py +2077 -0
  95. mindspore/dataset/__init__.py +90 -0
  96. mindspore/dataset/audio/__init__.py +61 -0
  97. mindspore/dataset/audio/transforms.py +3690 -0
  98. mindspore/dataset/audio/utils.py +386 -0
  99. mindspore/dataset/audio/validators.py +1172 -0
  100. mindspore/dataset/callback/__init__.py +20 -0
  101. mindspore/dataset/callback/ds_callback.py +368 -0
  102. mindspore/dataset/callback/validators.py +32 -0
  103. mindspore/dataset/core/__init__.py +13 -0
  104. mindspore/dataset/core/config.py +1095 -0
  105. mindspore/dataset/core/datatypes.py +101 -0
  106. mindspore/dataset/core/py_util_helpers.py +65 -0
  107. mindspore/dataset/core/validator_helpers.py +781 -0
  108. mindspore/dataset/debug/__init__.py +21 -0
  109. mindspore/dataset/debug/debug_hook.py +97 -0
  110. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  111. mindspore/dataset/engine/__init__.py +124 -0
  112. mindspore/dataset/engine/cache_admin.py +47 -0
  113. mindspore/dataset/engine/cache_client.py +129 -0
  114. mindspore/dataset/engine/datasets.py +4582 -0
  115. mindspore/dataset/engine/datasets_audio.py +911 -0
  116. mindspore/dataset/engine/datasets_standard_format.py +543 -0
  117. mindspore/dataset/engine/datasets_text.py +2161 -0
  118. mindspore/dataset/engine/datasets_user_defined.py +1184 -0
  119. mindspore/dataset/engine/datasets_vision.py +4816 -0
  120. mindspore/dataset/engine/iterators.py +371 -0
  121. mindspore/dataset/engine/obs/__init__.py +23 -0
  122. mindspore/dataset/engine/obs/config_loader.py +68 -0
  123. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +508 -0
  124. mindspore/dataset/engine/obs/util.py +482 -0
  125. mindspore/dataset/engine/offload.py +596 -0
  126. mindspore/dataset/engine/queue.py +304 -0
  127. mindspore/dataset/engine/samplers.py +895 -0
  128. mindspore/dataset/engine/serializer_deserializer.py +159 -0
  129. mindspore/dataset/engine/validators.py +2895 -0
  130. mindspore/dataset/text/__init__.py +51 -0
  131. mindspore/dataset/text/transforms.py +1703 -0
  132. mindspore/dataset/text/utils.py +715 -0
  133. mindspore/dataset/text/validators.py +642 -0
  134. mindspore/dataset/transforms/__init__.py +45 -0
  135. mindspore/dataset/transforms/c_transforms.py +638 -0
  136. mindspore/dataset/transforms/py_transforms.py +393 -0
  137. mindspore/dataset/transforms/py_transforms_util.py +255 -0
  138. mindspore/dataset/transforms/transforms.py +1260 -0
  139. mindspore/dataset/transforms/validators.py +410 -0
  140. mindspore/dataset/utils/__init__.py +19 -0
  141. mindspore/dataset/utils/browse_dataset.py +190 -0
  142. mindspore/dataset/utils/line_reader.py +126 -0
  143. mindspore/dataset/vision/__init__.py +65 -0
  144. mindspore/dataset/vision/c_transforms.py +2641 -0
  145. mindspore/dataset/vision/py_transforms.py +2120 -0
  146. mindspore/dataset/vision/py_transforms_util.py +1660 -0
  147. mindspore/dataset/vision/transforms.py +7295 -0
  148. mindspore/dataset/vision/utils.py +863 -0
  149. mindspore/dataset/vision/validators.py +1483 -0
  150. mindspore/default_config.py +2 -0
  151. mindspore/experimental/__init__.py +20 -0
  152. mindspore/experimental/es/__init__.py +22 -0
  153. mindspore/experimental/es/embedding_service.py +883 -0
  154. mindspore/experimental/es/embedding_service_layer.py +581 -0
  155. mindspore/experimental/llm_boost/__init__.py +21 -0
  156. mindspore/experimental/llm_boost/atb/__init__.py +23 -0
  157. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  158. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  159. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  160. mindspore/experimental/llm_boost/register.py +129 -0
  161. mindspore/experimental/llm_boost/utils.py +31 -0
  162. mindspore/experimental/map_parameter.py +309 -0
  163. mindspore/experimental/optim/__init__.py +40 -0
  164. mindspore/experimental/optim/adadelta.py +161 -0
  165. mindspore/experimental/optim/adagrad.py +168 -0
  166. mindspore/experimental/optim/adam.py +193 -0
  167. mindspore/experimental/optim/adamax.py +170 -0
  168. mindspore/experimental/optim/adamw.py +290 -0
  169. mindspore/experimental/optim/asgd.py +153 -0
  170. mindspore/experimental/optim/lr_scheduler.py +1371 -0
  171. mindspore/experimental/optim/nadam.py +157 -0
  172. mindspore/experimental/optim/optimizer.py +262 -0
  173. mindspore/experimental/optim/radam.py +194 -0
  174. mindspore/experimental/optim/rmsprop.py +154 -0
  175. mindspore/experimental/optim/rprop.py +164 -0
  176. mindspore/experimental/optim/sgd.py +156 -0
  177. mindspore/hal/__init__.py +40 -0
  178. mindspore/hal/_ascend.py +57 -0
  179. mindspore/hal/_base.py +57 -0
  180. mindspore/hal/_cpu.py +56 -0
  181. mindspore/hal/_gpu.py +57 -0
  182. mindspore/hal/contiguous_tensors_handle.py +175 -0
  183. mindspore/hal/device.py +356 -0
  184. mindspore/hal/event.py +179 -0
  185. mindspore/hal/memory.py +326 -0
  186. mindspore/hal/stream.py +357 -0
  187. mindspore/include/OWNERS +7 -0
  188. mindspore/include/api/allocator.h +97 -0
  189. mindspore/include/api/callback/callback.h +93 -0
  190. mindspore/include/api/callback/ckpt_saver.h +41 -0
  191. mindspore/include/api/callback/loss_monitor.h +33 -0
  192. mindspore/include/api/callback/lr_scheduler.h +51 -0
  193. mindspore/include/api/callback/time_monitor.h +34 -0
  194. mindspore/include/api/callback/train_accuracy.h +37 -0
  195. mindspore/include/api/cell.h +90 -0
  196. mindspore/include/api/cfg.h +82 -0
  197. mindspore/include/api/context.h +602 -0
  198. mindspore/include/api/data_type.h +47 -0
  199. mindspore/include/api/delegate.h +178 -0
  200. mindspore/include/api/delegate_api.h +75 -0
  201. mindspore/include/api/dual_abi_helper.h +208 -0
  202. mindspore/include/api/format.h +28 -0
  203. mindspore/include/api/graph.h +46 -0
  204. mindspore/include/api/kernel.h +58 -0
  205. mindspore/include/api/kernel_api.h +168 -0
  206. mindspore/include/api/metrics/accuracy.h +36 -0
  207. mindspore/include/api/metrics/metrics.h +41 -0
  208. mindspore/include/api/model.h +438 -0
  209. mindspore/include/api/model_group.h +91 -0
  210. mindspore/include/api/model_parallel_runner.h +168 -0
  211. mindspore/include/api/serialization.h +185 -0
  212. mindspore/include/api/status.h +192 -0
  213. mindspore/include/api/types.h +431 -0
  214. mindspore/include/api/visible.h +41 -0
  215. mindspore/include/c_api/context_c.h +179 -0
  216. mindspore/include/c_api/data_type_c.h +52 -0
  217. mindspore/include/c_api/format_c.h +46 -0
  218. mindspore/include/c_api/model_c.h +347 -0
  219. mindspore/include/c_api/status_c.h +79 -0
  220. mindspore/include/c_api/tensor_c.h +146 -0
  221. mindspore/include/c_api/types_c.h +67 -0
  222. mindspore/include/dataset/config.h +163 -0
  223. mindspore/include/dataset/constants.h +363 -0
  224. mindspore/include/dataset/execute.h +196 -0
  225. mindspore/include/dataset/text.h +1092 -0
  226. mindspore/include/dataset/transforms.h +638 -0
  227. mindspore/include/dataset/vision.h +2129 -0
  228. mindspore/include/dataset/vision_ascend.h +206 -0
  229. mindspore/include/dataset/vision_lite.h +625 -0
  230. mindspore/lib/libavcodec.59.dylib +0 -0
  231. mindspore/lib/libavdevice.59.dylib +0 -0
  232. mindspore/lib/libavfilter.8.dylib +0 -0
  233. mindspore/lib/libavformat.59.dylib +0 -0
  234. mindspore/lib/libavutil.57.dylib +0 -0
  235. mindspore/lib/libdnnl.2.dylib +0 -0
  236. mindspore/lib/libicudata.69.dylib +0 -0
  237. mindspore/lib/libicui18n.69.dylib +0 -0
  238. mindspore/lib/libicuuc.69.dylib +0 -0
  239. mindspore/lib/libmindspore_address_sorting.15.dylib +0 -0
  240. mindspore/lib/libmindspore_backend.dylib +0 -0
  241. mindspore/lib/libmindspore_common.dylib +0 -0
  242. mindspore/lib/libmindspore_core.dylib +0 -0
  243. mindspore/lib/libmindspore_glog.0.dylib +0 -0
  244. mindspore/lib/libmindspore_gpr.15.dylib +0 -0
  245. mindspore/lib/libmindspore_grpc++.1.dylib +0 -0
  246. mindspore/lib/libmindspore_grpc.15.dylib +0 -0
  247. mindspore/lib/libmindspore_np_dtype.dylib +0 -0
  248. mindspore/lib/libmindspore_ops.dylib +0 -0
  249. mindspore/lib/libmindspore_upb.15.dylib +0 -0
  250. mindspore/lib/libnnacl.dylib +0 -0
  251. mindspore/lib/libopencv_core.4.5.dylib +0 -0
  252. mindspore/lib/libopencv_imgcodecs.4.5.dylib +0 -0
  253. mindspore/lib/libopencv_imgproc.4.5.dylib +0 -0
  254. mindspore/lib/libps_cache.dylib +0 -0
  255. mindspore/lib/libswresample.4.dylib +0 -0
  256. mindspore/lib/libswscale.6.dylib +0 -0
  257. mindspore/lib/libtinyxml2.8.dylib +0 -0
  258. mindspore/log.py +633 -0
  259. mindspore/mindrecord/__init__.py +43 -0
  260. mindspore/mindrecord/common/__init__.py +17 -0
  261. mindspore/mindrecord/common/constant.py +20 -0
  262. mindspore/mindrecord/common/enums.py +44 -0
  263. mindspore/mindrecord/common/exceptions.py +311 -0
  264. mindspore/mindrecord/config.py +809 -0
  265. mindspore/mindrecord/filereader.py +174 -0
  266. mindspore/mindrecord/filewriter.py +722 -0
  267. mindspore/mindrecord/mindpage.py +210 -0
  268. mindspore/mindrecord/shardheader.py +141 -0
  269. mindspore/mindrecord/shardindexgenerator.py +74 -0
  270. mindspore/mindrecord/shardreader.py +117 -0
  271. mindspore/mindrecord/shardsegment.py +128 -0
  272. mindspore/mindrecord/shardutils.py +185 -0
  273. mindspore/mindrecord/shardwriter.py +237 -0
  274. mindspore/mindrecord/tools/__init__.py +17 -0
  275. mindspore/mindrecord/tools/cifar10.py +140 -0
  276. mindspore/mindrecord/tools/cifar100.py +153 -0
  277. mindspore/mindrecord/tools/cifar100_to_mr.py +185 -0
  278. mindspore/mindrecord/tools/cifar10_to_mr.py +177 -0
  279. mindspore/mindrecord/tools/csv_to_mr.py +200 -0
  280. mindspore/mindrecord/tools/imagenet_to_mr.py +206 -0
  281. mindspore/mindrecord/tools/mnist_to_mr.py +259 -0
  282. mindspore/mindrecord/tools/tfrecord_to_mr.py +360 -0
  283. mindspore/mint/__init__.py +1586 -0
  284. mindspore/mint/distributed/__init__.py +31 -0
  285. mindspore/mint/distributed/distributed.py +254 -0
  286. mindspore/mint/linalg/__init__.py +22 -0
  287. mindspore/mint/nn/__init__.py +757 -0
  288. mindspore/mint/nn/functional.py +679 -0
  289. mindspore/mint/nn/layer/__init__.py +39 -0
  290. mindspore/mint/nn/layer/activation.py +133 -0
  291. mindspore/mint/nn/layer/normalization.py +477 -0
  292. mindspore/mint/nn/layer/pooling.py +110 -0
  293. mindspore/mint/optim/__init__.py +24 -0
  294. mindspore/mint/optim/adamw.py +206 -0
  295. mindspore/mint/special/__init__.py +63 -0
  296. mindspore/multiprocessing/__init__.py +73 -0
  297. mindspore/nn/__init__.py +47 -0
  298. mindspore/nn/cell.py +2787 -0
  299. mindspore/nn/dynamic_lr.py +482 -0
  300. mindspore/nn/grad/__init__.py +21 -0
  301. mindspore/nn/grad/cell_grad.py +196 -0
  302. mindspore/nn/layer/__init__.py +63 -0
  303. mindspore/nn/layer/activation.py +1822 -0
  304. mindspore/nn/layer/basic.py +1629 -0
  305. mindspore/nn/layer/channel_shuffle.py +90 -0
  306. mindspore/nn/layer/combined.py +248 -0
  307. mindspore/nn/layer/container.py +734 -0
  308. mindspore/nn/layer/conv.py +1505 -0
  309. mindspore/nn/layer/dense.py +204 -0
  310. mindspore/nn/layer/embedding.py +869 -0
  311. mindspore/nn/layer/image.py +661 -0
  312. mindspore/nn/layer/math.py +1069 -0
  313. mindspore/nn/layer/normalization.py +1273 -0
  314. mindspore/nn/layer/padding.py +880 -0
  315. mindspore/nn/layer/pooling.py +2302 -0
  316. mindspore/nn/layer/rnn_cells.py +388 -0
  317. mindspore/nn/layer/rnns.py +849 -0
  318. mindspore/nn/layer/thor_layer.py +963 -0
  319. mindspore/nn/layer/timedistributed.py +155 -0
  320. mindspore/nn/layer/transformer.py +823 -0
  321. mindspore/nn/learning_rate_schedule.py +512 -0
  322. mindspore/nn/loss/__init__.py +36 -0
  323. mindspore/nn/loss/loss.py +2924 -0
  324. mindspore/nn/metrics.py +53 -0
  325. mindspore/nn/optim/__init__.py +45 -0
  326. mindspore/nn/optim/_dist_optimizer_registry.py +111 -0
  327. mindspore/nn/optim/ada_grad.py +217 -0
  328. mindspore/nn/optim/adadelta.py +206 -0
  329. mindspore/nn/optim/adafactor.py +448 -0
  330. mindspore/nn/optim/adam.py +1297 -0
  331. mindspore/nn/optim/adamax.py +220 -0
  332. mindspore/nn/optim/adasum.py +548 -0
  333. mindspore/nn/optim/asgd.py +216 -0
  334. mindspore/nn/optim/ftrl.py +401 -0
  335. mindspore/nn/optim/lamb.py +296 -0
  336. mindspore/nn/optim/lars.py +202 -0
  337. mindspore/nn/optim/lazyadam.py +533 -0
  338. mindspore/nn/optim/momentum.py +239 -0
  339. mindspore/nn/optim/optimizer.py +1034 -0
  340. mindspore/nn/optim/proximal_ada_grad.py +242 -0
  341. mindspore/nn/optim/rmsprop.py +264 -0
  342. mindspore/nn/optim/rprop.py +251 -0
  343. mindspore/nn/optim/sgd.py +237 -0
  344. mindspore/nn/optim/tft_wrapper.py +127 -0
  345. mindspore/nn/optim/thor.py +1310 -0
  346. mindspore/nn/probability/__init__.py +22 -0
  347. mindspore/nn/probability/bijector/__init__.py +35 -0
  348. mindspore/nn/probability/bijector/bijector.py +337 -0
  349. mindspore/nn/probability/bijector/exp.py +65 -0
  350. mindspore/nn/probability/bijector/gumbel_cdf.py +144 -0
  351. mindspore/nn/probability/bijector/invert.py +126 -0
  352. mindspore/nn/probability/bijector/power_transform.py +196 -0
  353. mindspore/nn/probability/bijector/scalar_affine.py +167 -0
  354. mindspore/nn/probability/bijector/softplus.py +189 -0
  355. mindspore/nn/probability/bnn_layers/__init__.py +29 -0
  356. mindspore/nn/probability/bnn_layers/_util.py +46 -0
  357. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +112 -0
  358. mindspore/nn/probability/bnn_layers/conv_variational.py +267 -0
  359. mindspore/nn/probability/bnn_layers/dense_variational.py +302 -0
  360. mindspore/nn/probability/bnn_layers/layer_distribution.py +123 -0
  361. mindspore/nn/probability/distribution/__init__.py +56 -0
  362. mindspore/nn/probability/distribution/_utils/__init__.py +34 -0
  363. mindspore/nn/probability/distribution/_utils/custom_ops.py +96 -0
  364. mindspore/nn/probability/distribution/_utils/utils.py +362 -0
  365. mindspore/nn/probability/distribution/bernoulli.py +334 -0
  366. mindspore/nn/probability/distribution/beta.py +391 -0
  367. mindspore/nn/probability/distribution/categorical.py +435 -0
  368. mindspore/nn/probability/distribution/cauchy.py +383 -0
  369. mindspore/nn/probability/distribution/distribution.py +827 -0
  370. mindspore/nn/probability/distribution/exponential.py +350 -0
  371. mindspore/nn/probability/distribution/gamma.py +391 -0
  372. mindspore/nn/probability/distribution/geometric.py +335 -0
  373. mindspore/nn/probability/distribution/gumbel.py +257 -0
  374. mindspore/nn/probability/distribution/half_normal.py +133 -0
  375. mindspore/nn/probability/distribution/laplace.py +128 -0
  376. mindspore/nn/probability/distribution/log_normal.py +272 -0
  377. mindspore/nn/probability/distribution/logistic.py +379 -0
  378. mindspore/nn/probability/distribution/normal.py +336 -0
  379. mindspore/nn/probability/distribution/poisson.py +288 -0
  380. mindspore/nn/probability/distribution/student_t.py +149 -0
  381. mindspore/nn/probability/distribution/transformed_distribution.py +235 -0
  382. mindspore/nn/probability/distribution/uniform.py +375 -0
  383. mindspore/nn/reinforcement/__init__.py +24 -0
  384. mindspore/nn/reinforcement/_batch_read_write.py +142 -0
  385. mindspore/nn/reinforcement/_tensors_queue.py +152 -0
  386. mindspore/nn/reinforcement/tensor_array.py +145 -0
  387. mindspore/nn/sparse/__init__.py +23 -0
  388. mindspore/nn/sparse/sparse.py +147 -0
  389. mindspore/nn/wrap/__init__.py +49 -0
  390. mindspore/nn/wrap/cell_wrapper.py +968 -0
  391. mindspore/nn/wrap/grad_reducer.py +608 -0
  392. mindspore/nn/wrap/loss_scale.py +694 -0
  393. mindspore/numpy/__init__.py +121 -0
  394. mindspore/numpy/array_creations.py +2731 -0
  395. mindspore/numpy/array_ops.py +2629 -0
  396. mindspore/numpy/dtypes.py +185 -0
  397. mindspore/numpy/fft.py +966 -0
  398. mindspore/numpy/logic_ops.py +936 -0
  399. mindspore/numpy/math_ops.py +5911 -0
  400. mindspore/numpy/utils.py +214 -0
  401. mindspore/numpy/utils_const.py +565 -0
  402. mindspore/ops/__init__.py +56 -0
  403. mindspore/ops/_constants.py +30 -0
  404. mindspore/ops/_grad_experimental/__init__.py +31 -0
  405. mindspore/ops/_grad_experimental/grad_array_ops.py +830 -0
  406. mindspore/ops/_grad_experimental/grad_base.py +143 -0
  407. mindspore/ops/_grad_experimental/grad_comm_ops.py +714 -0
  408. mindspore/ops/_grad_experimental/grad_debug_ops.py +31 -0
  409. mindspore/ops/_grad_experimental/grad_implementations.py +203 -0
  410. mindspore/ops/_grad_experimental/grad_inner_ops.py +79 -0
  411. mindspore/ops/_grad_experimental/grad_math_ops.py +802 -0
  412. mindspore/ops/_grad_experimental/grad_nn_ops.py +231 -0
  413. mindspore/ops/_grad_experimental/grad_quant_ops.py +238 -0
  414. mindspore/ops/_grad_experimental/grad_sparse.py +342 -0
  415. mindspore/ops/_grad_experimental/grad_sparse_ops.py +399 -0
  416. mindspore/ops/_grad_experimental/taylor_rule.py +220 -0
  417. mindspore/ops/_op_impl/__init__.py +23 -0
  418. mindspore/ops/_op_impl/_custom_op/__init__.py +39 -0
  419. mindspore/ops/_op_impl/_custom_op/_basic.py +158 -0
  420. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +279 -0
  421. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +156 -0
  422. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +109 -0
  423. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +125 -0
  424. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +105 -0
  425. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +124 -0
  426. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +116 -0
  427. mindspore/ops/_op_impl/_custom_op/correction_mul.py +89 -0
  428. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +196 -0
  429. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +366 -0
  430. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +162 -0
  431. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +136 -0
  432. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +206 -0
  433. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +88 -0
  434. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +128 -0
  435. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +199 -0
  436. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +88 -0
  437. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +156 -0
  438. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +184 -0
  439. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +143 -0
  440. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +169 -0
  441. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +548 -0
  442. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +881 -0
  443. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +278 -0
  444. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +200 -0
  445. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +334 -0
  446. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +255 -0
  447. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +222 -0
  448. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +644 -0
  449. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +488 -0
  450. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +87 -0
  451. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +129 -0
  452. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +121 -0
  453. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +352 -0
  454. mindspore/ops/_op_impl/aicpu/__init__.py +441 -0
  455. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  456. mindspore/ops/_op_impl/aicpu/acos.py +32 -0
  457. mindspore/ops/_op_impl/aicpu/acos_grad.py +33 -0
  458. mindspore/ops/_op_impl/aicpu/acosh.py +34 -0
  459. mindspore/ops/_op_impl/aicpu/acosh_grad.py +35 -0
  460. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  461. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  462. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  463. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  464. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  465. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  466. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  467. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  468. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  469. mindspore/ops/_op_impl/aicpu/add_n.py +41 -0
  470. mindspore/ops/_op_impl/aicpu/add_v2.py +40 -0
  471. mindspore/ops/_op_impl/aicpu/addcdiv.py +41 -0
  472. mindspore/ops/_op_impl/aicpu/addcmul.py +47 -0
  473. mindspore/ops/_op_impl/aicpu/adjust_contrastv2.py +32 -0
  474. mindspore/ops/_op_impl/aicpu/adjust_hue.py +31 -0
  475. mindspore/ops/_op_impl/aicpu/adjust_saturation.py +32 -0
  476. mindspore/ops/_op_impl/aicpu/affine_grid.py +33 -0
  477. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  478. mindspore/ops/_op_impl/aicpu/angle.py +31 -0
  479. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  480. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  481. mindspore/ops/_op_impl/aicpu/argmax_with_value.py +43 -0
  482. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  483. mindspore/ops/_op_impl/aicpu/asin.py +32 -0
  484. mindspore/ops/_op_impl/aicpu/asin_grad.py +33 -0
  485. mindspore/ops/_op_impl/aicpu/asinh.py +34 -0
  486. mindspore/ops/_op_impl/aicpu/asinh_grad.py +35 -0
  487. mindspore/ops/_op_impl/aicpu/atanh.py +34 -0
  488. mindspore/ops/_op_impl/aicpu/avgpool_grad_v1.py +37 -0
  489. mindspore/ops/_op_impl/aicpu/avgpool_v1.py +36 -0
  490. mindspore/ops/_op_impl/aicpu/bartlett_window.py +36 -0
  491. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  492. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  493. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  494. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  495. mindspore/ops/_op_impl/aicpu/betainc.py +31 -0
  496. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  497. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +42 -0
  498. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  499. mindspore/ops/_op_impl/aicpu/blackman_window.py +36 -0
  500. mindspore/ops/_op_impl/aicpu/broadcast_to.py +58 -0
  501. mindspore/ops/_op_impl/aicpu/bucketize.py +34 -0
  502. mindspore/ops/_op_impl/aicpu/cache_swap_table.py +102 -0
  503. mindspore/ops/_op_impl/aicpu/cast.py +225 -0
  504. mindspore/ops/_op_impl/aicpu/cauchy.py +33 -0
  505. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  506. mindspore/ops/_op_impl/aicpu/check_numerics.py +33 -0
  507. mindspore/ops/_op_impl/aicpu/cholesky.py +32 -0
  508. mindspore/ops/_op_impl/aicpu/cholesky_inverse.py +31 -0
  509. mindspore/ops/_op_impl/aicpu/cholesky_solve.py +33 -0
  510. mindspore/ops/_op_impl/aicpu/choleskygrad.py +32 -0
  511. mindspore/ops/_op_impl/aicpu/coalesce.py +37 -0
  512. mindspore/ops/_op_impl/aicpu/col2im.py +38 -0
  513. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  514. mindspore/ops/_op_impl/aicpu/compare_and_bitpack.py +37 -0
  515. mindspore/ops/_op_impl/aicpu/complex.py +32 -0
  516. mindspore/ops/_op_impl/aicpu/complex_abs.py +31 -0
  517. mindspore/ops/_op_impl/aicpu/compute_accidental_hits.py +44 -0
  518. mindspore/ops/_op_impl/aicpu/concat.py +57 -0
  519. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  520. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  521. mindspore/ops/_op_impl/aicpu/conj.py +42 -0
  522. mindspore/ops/_op_impl/aicpu/conjugate_transpose.py +58 -0
  523. mindspore/ops/_op_impl/aicpu/cos.py +34 -0
  524. mindspore/ops/_op_impl/aicpu/cosh.py +34 -0
  525. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  526. mindspore/ops/_op_impl/aicpu/crop_and_resize.py +69 -0
  527. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_boxes.py +68 -0
  528. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  529. mindspore/ops/_op_impl/aicpu/cross.py +42 -0
  530. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_dense.py +48 -0
  531. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_sparse_tensor.py +51 -0
  532. mindspore/ops/_op_impl/aicpu/ctc_greedy_decoder.py +35 -0
  533. mindspore/ops/_op_impl/aicpu/ctc_loss_v2.py +43 -0
  534. mindspore/ops/_op_impl/aicpu/ctc_loss_v2_grad.py +45 -0
  535. mindspore/ops/_op_impl/aicpu/ctcloss.py +38 -0
  536. mindspore/ops/_op_impl/aicpu/cummax.py +41 -0
  537. mindspore/ops/_op_impl/aicpu/cumprod.py +58 -0
  538. mindspore/ops/_op_impl/aicpu/cumsum.py +58 -0
  539. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  540. mindspore/ops/_op_impl/aicpu/data_format_vec_permute.py +32 -0
  541. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  542. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  543. mindspore/ops/_op_impl/aicpu/dense_to_csr_sparse_matrix.py +49 -0
  544. mindspore/ops/_op_impl/aicpu/dense_to_dense_set_operation.py +45 -0
  545. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  546. mindspore/ops/_op_impl/aicpu/depth_to_space.py +44 -0
  547. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  548. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  549. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  550. mindspore/ops/_op_impl/aicpu/digamma.py +31 -0
  551. mindspore/ops/_op_impl/aicpu/div.py +41 -0
  552. mindspore/ops/_op_impl/aicpu/div_no_nan.py +35 -0
  553. mindspore/ops/_op_impl/aicpu/dropout2d.py +42 -0
  554. mindspore/ops/_op_impl/aicpu/dropout3d.py +42 -0
  555. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +41 -0
  556. mindspore/ops/_op_impl/aicpu/dropout_genmask_v3.py +32 -0
  557. mindspore/ops/_op_impl/aicpu/dynamic_stitch.py +42 -0
  558. mindspore/ops/_op_impl/aicpu/edit_distance.py +56 -0
  559. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  560. mindspore/ops/_op_impl/aicpu/embedding_lookup.py +102 -0
  561. mindspore/ops/_op_impl/aicpu/end_of_sequence.py +30 -0
  562. mindspore/ops/_op_impl/aicpu/environ_create.py +28 -0
  563. mindspore/ops/_op_impl/aicpu/environ_destroy_all.py +28 -0
  564. mindspore/ops/_op_impl/aicpu/environ_get.py +41 -0
  565. mindspore/ops/_op_impl/aicpu/environ_set.py +40 -0
  566. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  567. mindspore/ops/_op_impl/aicpu/equal.py +41 -0
  568. mindspore/ops/_op_impl/aicpu/exp.py +37 -0
  569. mindspore/ops/_op_impl/aicpu/expand.py +45 -0
  570. mindspore/ops/_op_impl/aicpu/expand_dims.py +42 -0
  571. mindspore/ops/_op_impl/aicpu/expm1.py +34 -0
  572. mindspore/ops/_op_impl/aicpu/extract_glimpse.py +35 -0
  573. mindspore/ops/_op_impl/aicpu/eye.py +44 -0
  574. mindspore/ops/_op_impl/aicpu/fft_with_size.py +47 -0
  575. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +39 -0
  576. mindspore/ops/_op_impl/aicpu/fill_v2.py +58 -0
  577. mindspore/ops/_op_impl/aicpu/flatten.py +43 -0
  578. mindspore/ops/_op_impl/aicpu/floor_div.py +38 -0
  579. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  580. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  581. mindspore/ops/_op_impl/aicpu/fractional_avg_pool.py +41 -0
  582. mindspore/ops/_op_impl/aicpu/fractional_avg_pool_grad.py +41 -0
  583. mindspore/ops/_op_impl/aicpu/fractional_max_pool.py +41 -0
  584. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_grad_with_fixed_ksize.py +43 -0
  585. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +65 -0
  586. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad.py +42 -0
  587. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad_with_fixed_ksize.py +42 -0
  588. mindspore/ops/_op_impl/aicpu/fractional_max_pool_with_fixed_ksize.py +49 -0
  589. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  590. mindspore/ops/_op_impl/aicpu/fused_sparse_adam.py +46 -0
  591. mindspore/ops/_op_impl/aicpu/fused_sparse_ftrl.py +41 -0
  592. mindspore/ops/_op_impl/aicpu/fused_sparse_lazy_adam.py +46 -0
  593. mindspore/ops/_op_impl/aicpu/fused_sparse_proximal_adagrad.py +39 -0
  594. mindspore/ops/_op_impl/aicpu/gamma.py +38 -0
  595. mindspore/ops/_op_impl/aicpu/gather.py +46 -0
  596. mindspore/ops/_op_impl/aicpu/gather_d.py +79 -0
  597. mindspore/ops/_op_impl/aicpu/gather_d_grad_v2.py +79 -0
  598. mindspore/ops/_op_impl/aicpu/gather_grad.py +54 -0
  599. mindspore/ops/_op_impl/aicpu/gather_nd.py +56 -0
  600. mindspore/ops/_op_impl/aicpu/gcd.py +32 -0
  601. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  602. mindspore/ops/_op_impl/aicpu/geqrf.py +32 -0
  603. mindspore/ops/_op_impl/aicpu/get_next.py +39 -0
  604. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  605. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  606. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  607. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  608. mindspore/ops/_op_impl/aicpu/grid_sampler_2d.py +35 -0
  609. mindspore/ops/_op_impl/aicpu/grid_sampler_2d_grad.py +38 -0
  610. mindspore/ops/_op_impl/aicpu/grid_sampler_3d.py +34 -0
  611. mindspore/ops/_op_impl/aicpu/grid_sampler_3d_grad.py +38 -0
  612. mindspore/ops/_op_impl/aicpu/hamming_window.py +57 -0
  613. mindspore/ops/_op_impl/aicpu/hard_sigmoid.py +32 -0
  614. mindspore/ops/_op_impl/aicpu/hard_sigmoid_grad.py +33 -0
  615. mindspore/ops/_op_impl/aicpu/heaviside.py +40 -0
  616. mindspore/ops/_op_impl/aicpu/histogram.py +35 -0
  617. mindspore/ops/_op_impl/aicpu/hsv_to_rgb.py +32 -0
  618. mindspore/ops/_op_impl/aicpu/hypot.py +32 -0
  619. mindspore/ops/_op_impl/aicpu/identity.py +42 -0
  620. mindspore/ops/_op_impl/aicpu/identity_n.py +41 -0
  621. mindspore/ops/_op_impl/aicpu/igamma.py +30 -0
  622. mindspore/ops/_op_impl/aicpu/igammac.py +30 -0
  623. mindspore/ops/_op_impl/aicpu/igammagrada.py +30 -0
  624. mindspore/ops/_op_impl/aicpu/im2col.py +43 -0
  625. mindspore/ops/_op_impl/aicpu/imag.py +31 -0
  626. mindspore/ops/_op_impl/aicpu/index_fill.py +54 -0
  627. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  628. mindspore/ops/_op_impl/aicpu/init_data_set_queue.py +27 -0
  629. mindspore/ops/_op_impl/aicpu/inplace_index_add.py +39 -0
  630. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  631. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  632. mindspore/ops/_op_impl/aicpu/is_finite.py +40 -0
  633. mindspore/ops/_op_impl/aicpu/is_inf.py +31 -0
  634. mindspore/ops/_op_impl/aicpu/is_nan.py +31 -0
  635. mindspore/ops/_op_impl/aicpu/kldivloss.py +34 -0
  636. mindspore/ops/_op_impl/aicpu/kldivlossgrad.py +35 -0
  637. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  638. mindspore/ops/_op_impl/aicpu/lcm.py +32 -0
  639. mindspore/ops/_op_impl/aicpu/left_shift.py +38 -0
  640. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  641. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  642. mindspore/ops/_op_impl/aicpu/lgamma.py +33 -0
  643. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +57 -0
  644. mindspore/ops/_op_impl/aicpu/linspace.py +33 -0
  645. mindspore/ops/_op_impl/aicpu/list_diff.py +50 -0
  646. mindspore/ops/_op_impl/aicpu/log.py +37 -0
  647. mindspore/ops/_op_impl/aicpu/log1p.py +34 -0
  648. mindspore/ops/_op_impl/aicpu/log_matrix_determinant.py +31 -0
  649. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  650. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +37 -0
  651. mindspore/ops/_op_impl/aicpu/logical_xor.py +30 -0
  652. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  653. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  654. mindspore/ops/_op_impl/aicpu/logspace.py +36 -0
  655. mindspore/ops/_op_impl/aicpu/lower_bound.py +47 -0
  656. mindspore/ops/_op_impl/aicpu/lstsq.py +34 -0
  657. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  658. mindspore/ops/_op_impl/aicpu/lu_solve.py +32 -0
  659. mindspore/ops/_op_impl/aicpu/lu_unpack.py +114 -0
  660. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +49 -0
  661. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  662. mindspore/ops/_op_impl/aicpu/masked_scatter.py +40 -0
  663. mindspore/ops/_op_impl/aicpu/masked_select.py +31 -0
  664. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +35 -0
  665. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  666. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  667. mindspore/ops/_op_impl/aicpu/matrix_determinant.py +30 -0
  668. mindspore/ops/_op_impl/aicpu/matrix_diag_part_v3.py +54 -0
  669. mindspore/ops/_op_impl/aicpu/matrix_diag_v3.py +56 -0
  670. mindspore/ops/_op_impl/aicpu/matrix_exp.py +34 -0
  671. mindspore/ops/_op_impl/aicpu/matrix_inverse.py +31 -0
  672. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  673. mindspore/ops/_op_impl/aicpu/matrix_power.py +37 -0
  674. mindspore/ops/_op_impl/aicpu/matrix_set_diag_v3.py +54 -0
  675. mindspore/ops/_op_impl/aicpu/matrix_solve.py +35 -0
  676. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  677. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  678. mindspore/ops/_op_impl/aicpu/max_pool3d_grad_with_argmax.py +60 -0
  679. mindspore/ops/_op_impl/aicpu/max_pool3d_with_argmax.py +59 -0
  680. mindspore/ops/_op_impl/aicpu/max_unpool2d.py +57 -0
  681. mindspore/ops/_op_impl/aicpu/max_unpool2d_grad.py +58 -0
  682. mindspore/ops/_op_impl/aicpu/max_unpool3d.py +57 -0
  683. mindspore/ops/_op_impl/aicpu/max_unpool3d_grad.py +58 -0
  684. mindspore/ops/_op_impl/aicpu/maximum_grad_grad.py +40 -0
  685. mindspore/ops/_op_impl/aicpu/maxpool_grad_v1.py +46 -0
  686. mindspore/ops/_op_impl/aicpu/maxpool_v1.py +42 -0
  687. mindspore/ops/_op_impl/aicpu/median.py +39 -0
  688. mindspore/ops/_op_impl/aicpu/median_grad.py +45 -0
  689. mindspore/ops/_op_impl/aicpu/meshgrid.py +41 -0
  690. mindspore/ops/_op_impl/aicpu/minimum_grad_grad.py +40 -0
  691. mindspore/ops/_op_impl/aicpu/mirror_pad.py +50 -0
  692. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +48 -0
  693. mindspore/ops/_op_impl/aicpu/mul.py +43 -0
  694. mindspore/ops/_op_impl/aicpu/mul_no_nan.py +42 -0
  695. mindspore/ops/_op_impl/aicpu/multi_margin_loss.py +37 -0
  696. mindspore/ops/_op_impl/aicpu/multi_margin_loss_grad.py +41 -0
  697. mindspore/ops/_op_impl/aicpu/multilabel_margin_loss_grad.py +37 -0
  698. mindspore/ops/_op_impl/aicpu/multinomial.py +47 -0
  699. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  700. mindspore/ops/_op_impl/aicpu/mvlgamma.py +32 -0
  701. mindspore/ops/_op_impl/aicpu/mvlgamma_grad.py +33 -0
  702. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  703. mindspore/ops/_op_impl/aicpu/neg.py +36 -0
  704. mindspore/ops/_op_impl/aicpu/nextafter.py +32 -0
  705. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  706. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  707. mindspore/ops/_op_impl/aicpu/no_repeat_ngram.py +34 -0
  708. mindspore/ops/_op_impl/aicpu/non_deterministic_ints.py +33 -0
  709. mindspore/ops/_op_impl/aicpu/non_max_suppression.py +36 -0
  710. mindspore/ops/_op_impl/aicpu/non_max_suppression_with_overlaps.py +35 -0
  711. mindspore/ops/_op_impl/aicpu/non_zero.py +43 -0
  712. mindspore/ops/_op_impl/aicpu/not_equal.py +39 -0
  713. mindspore/ops/_op_impl/aicpu/nth_element.py +39 -0
  714. mindspore/ops/_op_impl/aicpu/nuclear_norm.py +33 -0
  715. mindspore/ops/_op_impl/aicpu/one_hot.py +116 -0
  716. mindspore/ops/_op_impl/aicpu/ones_like.py +39 -0
  717. mindspore/ops/_op_impl/aicpu/orgqr.py +34 -0
  718. mindspore/ops/_op_impl/aicpu/pad_and_shift.py +33 -0
  719. mindspore/ops/_op_impl/aicpu/pad_v3.py +61 -0
  720. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +59 -0
  721. mindspore/ops/_op_impl/aicpu/padding.py +41 -0
  722. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +54 -0
  723. mindspore/ops/_op_impl/aicpu/pdist_grad.py +33 -0
  724. mindspore/ops/_op_impl/aicpu/poisson.py +37 -0
  725. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  726. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  727. mindspore/ops/_op_impl/aicpu/pow.py +39 -0
  728. mindspore/ops/_op_impl/aicpu/print_tensor.py +39 -0
  729. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +113 -0
  730. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  731. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  732. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  733. mindspore/ops/_op_impl/aicpu/ragged_range.py +49 -0
  734. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  735. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  736. mindspore/ops/_op_impl/aicpu/random_categorical.py +68 -0
  737. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +36 -0
  738. mindspore/ops/_op_impl/aicpu/random_gamma.py +38 -0
  739. mindspore/ops/_op_impl/aicpu/random_poisson.py +134 -0
  740. mindspore/ops/_op_impl/aicpu/random_shuffle.py +47 -0
  741. mindspore/ops/_op_impl/aicpu/randperm.py +38 -0
  742. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  743. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  744. mindspore/ops/_op_impl/aicpu/range_v2.py +35 -0
  745. mindspore/ops/_op_impl/aicpu/real.py +31 -0
  746. mindspore/ops/_op_impl/aicpu/real_div.py +40 -0
  747. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  748. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  749. mindspore/ops/_op_impl/aicpu/reduce_mean.py +57 -0
  750. mindspore/ops/_op_impl/aicpu/reduce_prod.py +57 -0
  751. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  752. mindspore/ops/_op_impl/aicpu/relu_grad_v3.py +41 -0
  753. mindspore/ops/_op_impl/aicpu/relu_v3.py +38 -0
  754. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +96 -0
  755. mindspore/ops/_op_impl/aicpu/reshape.py +42 -0
  756. mindspore/ops/_op_impl/aicpu/resize_area.py +40 -0
  757. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +20 -0
  758. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +19 -0
  759. mindspore/ops/_op_impl/aicpu/resize_bilinear.py +32 -0
  760. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +32 -0
  761. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +36 -0
  762. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +35 -0
  763. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  764. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  765. mindspore/ops/_op_impl/aicpu/reverse_sequence.py +55 -0
  766. mindspore/ops/_op_impl/aicpu/reversev2.py +54 -0
  767. mindspore/ops/_op_impl/aicpu/rgb_to_hsv.py +32 -0
  768. mindspore/ops/_op_impl/aicpu/right_shift.py +38 -0
  769. mindspore/ops/_op_impl/aicpu/rnnt_loss.py +35 -0
  770. mindspore/ops/_op_impl/aicpu/round.py +34 -0
  771. mindspore/ops/_op_impl/aicpu/rsqrt.py +33 -0
  772. mindspore/ops/_op_impl/aicpu/rsqrt_grad.py +36 -0
  773. mindspore/ops/_op_impl/aicpu/sample_distorted_bounding_box_v2.py +49 -0
  774. mindspore/ops/_op_impl/aicpu/scale_and_translate.py +52 -0
  775. mindspore/ops/_op_impl/aicpu/scale_and_translate_grad.py +36 -0
  776. mindspore/ops/_op_impl/aicpu/scatter.py +79 -0
  777. mindspore/ops/_op_impl/aicpu/scatter_add_with_axis.py +53 -0
  778. mindspore/ops/_op_impl/aicpu/scatter_elements.py +39 -0
  779. mindspore/ops/_op_impl/aicpu/scatter_nd.py +59 -0
  780. mindspore/ops/_op_impl/aicpu/scatter_nd_max.py +54 -0
  781. mindspore/ops/_op_impl/aicpu/scatter_nd_min.py +54 -0
  782. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +59 -0
  783. mindspore/ops/_op_impl/aicpu/search_sorted.py +44 -0
  784. mindspore/ops/_op_impl/aicpu/segment_max.py +52 -0
  785. mindspore/ops/_op_impl/aicpu/segment_mean.py +56 -0
  786. mindspore/ops/_op_impl/aicpu/segment_min.py +52 -0
  787. mindspore/ops/_op_impl/aicpu/segment_prod.py +56 -0
  788. mindspore/ops/_op_impl/aicpu/segment_sum.py +56 -0
  789. mindspore/ops/_op_impl/aicpu/select.py +45 -0
  790. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  791. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  792. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  793. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  794. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  795. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  796. mindspore/ops/_op_impl/aicpu/set_size.py +38 -0
  797. mindspore/ops/_op_impl/aicpu/sign.py +36 -0
  798. mindspore/ops/_op_impl/aicpu/sin.py +34 -0
  799. mindspore/ops/_op_impl/aicpu/sinc.py +43 -0
  800. mindspore/ops/_op_impl/aicpu/sinh.py +34 -0
  801. mindspore/ops/_op_impl/aicpu/slice.py +59 -0
  802. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  803. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  804. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  805. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  806. mindspore/ops/_op_impl/aicpu/space_to_depth.py +44 -0
  807. mindspore/ops/_op_impl/aicpu/sparse_addmm.py +87 -0
  808. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +80 -0
  809. mindspore/ops/_op_impl/aicpu/sparse_apply_centered_rms_prop.py +105 -0
  810. mindspore/ops/_op_impl/aicpu/sparse_apply_momentum.py +80 -0
  811. mindspore/ops/_op_impl/aicpu/sparse_apply_proximal_gradient_descent.py +79 -0
  812. mindspore/ops/_op_impl/aicpu/sparse_concat.py +59 -0
  813. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  814. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_add.py +58 -0
  815. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_div.py +58 -0
  816. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_mul.py +58 -0
  817. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  818. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  819. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  820. mindspore/ops/_op_impl/aicpu/sparse_matrix_nnz.py +81 -0
  821. mindspore/ops/_op_impl/aicpu/sparse_matrix_transpose.py +116 -0
  822. mindspore/ops/_op_impl/aicpu/sparse_reorder.py +56 -0
  823. mindspore/ops/_op_impl/aicpu/sparse_reshape.py +34 -0
  824. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_grad.py +36 -0
  825. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_with_num_segments.py +44 -0
  826. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n.py +43 -0
  827. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_grad.py +38 -0
  828. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_with_num_segments.py +44 -0
  829. mindspore/ops/_op_impl/aicpu/sparse_segment_sum.py +49 -0
  830. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  831. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  832. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  833. mindspore/ops/_op_impl/aicpu/sparse_softmax.py +33 -0
  834. mindspore/ops/_op_impl/aicpu/sparse_softmax_cross_entropy_with_logits_v2.py +35 -0
  835. mindspore/ops/_op_impl/aicpu/sparse_sparse_maximum.py +53 -0
  836. mindspore/ops/_op_impl/aicpu/sparse_sparse_minimum.py +53 -0
  837. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_add.py +84 -0
  838. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_mat_mul.py +190 -0
  839. mindspore/ops/_op_impl/aicpu/sparse_tensor_to_csr_sparse_matrix.py +51 -0
  840. mindspore/ops/_op_impl/aicpu/sparse_to_dense_v2.py +73 -0
  841. mindspore/ops/_op_impl/aicpu/split.py +45 -0
  842. mindspore/ops/_op_impl/aicpu/sqrt.py +34 -0
  843. mindspore/ops/_op_impl/aicpu/sqrt_grad.py +35 -0
  844. mindspore/ops/_op_impl/aicpu/square.py +35 -0
  845. mindspore/ops/_op_impl/aicpu/squared_difference.py +37 -0
  846. mindspore/ops/_op_impl/aicpu/squeeze.py +42 -0
  847. mindspore/ops/_op_impl/aicpu/sspaddmm.py +97 -0
  848. mindspore/ops/_op_impl/aicpu/stack.py +45 -0
  849. mindspore/ops/_op_impl/aicpu/stack_push_pop.py +87 -0
  850. mindspore/ops/_op_impl/aicpu/standard_laplace.py +34 -0
  851. mindspore/ops/_op_impl/aicpu/standard_normal.py +34 -0
  852. mindspore/ops/_op_impl/aicpu/stateless_dropout_genmask.py +37 -0
  853. mindspore/ops/_op_impl/aicpu/stft.py +70 -0
  854. mindspore/ops/_op_impl/aicpu/strided_slice.py +43 -0
  855. mindspore/ops/_op_impl/aicpu/strided_slice_grad.py +50 -0
  856. mindspore/ops/_op_impl/aicpu/sub.py +41 -0
  857. mindspore/ops/_op_impl/aicpu/sub_and_filter.py +36 -0
  858. mindspore/ops/_op_impl/aicpu/tan.py +34 -0
  859. mindspore/ops/_op_impl/aicpu/tanh.py +34 -0
  860. mindspore/ops/_op_impl/aicpu/tanh_grad.py +35 -0
  861. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  862. mindspore/ops/_op_impl/aicpu/tile.py +56 -0
  863. mindspore/ops/_op_impl/aicpu/topk.py +34 -0
  864. mindspore/ops/_op_impl/aicpu/trace.py +40 -0
  865. mindspore/ops/_op_impl/aicpu/tracegrad.py +41 -0
  866. mindspore/ops/_op_impl/aicpu/trans_data.py +35 -0
  867. mindspore/ops/_op_impl/aicpu/transpose.py +58 -0
  868. mindspore/ops/_op_impl/aicpu/tridiagonal_matmul.py +42 -0
  869. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  870. mindspore/ops/_op_impl/aicpu/tril.py +42 -0
  871. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  872. mindspore/ops/_op_impl/aicpu/triplet_margin_loss.py +62 -0
  873. mindspore/ops/_op_impl/aicpu/triu.py +43 -0
  874. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  875. mindspore/ops/_op_impl/aicpu/truncated_normal.py +39 -0
  876. mindspore/ops/_op_impl/aicpu/uniform.py +36 -0
  877. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +41 -0
  878. mindspore/ops/_op_impl/aicpu/uniform_int.py +36 -0
  879. mindspore/ops/_op_impl/aicpu/uniform_real.py +33 -0
  880. mindspore/ops/_op_impl/aicpu/unique.py +31 -0
  881. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +47 -0
  882. mindspore/ops/_op_impl/aicpu/unique_with_pad.py +32 -0
  883. mindspore/ops/_op_impl/aicpu/unravel_index.py +32 -0
  884. mindspore/ops/_op_impl/aicpu/unsorted_segment_prod.py +53 -0
  885. mindspore/ops/_op_impl/aicpu/unsorted_segment_sum.py +57 -0
  886. mindspore/ops/_op_impl/aicpu/unstack.py +45 -0
  887. mindspore/ops/_op_impl/aicpu/update_cache.py +44 -0
  888. mindspore/ops/_op_impl/aicpu/upper_bound.py +47 -0
  889. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +42 -0
  890. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +49 -0
  891. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +40 -0
  892. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +50 -0
  893. mindspore/ops/_op_impl/aicpu/xdivy.py +35 -0
  894. mindspore/ops/_op_impl/aicpu/xlogy.py +33 -0
  895. mindspore/ops/_op_impl/aicpu/zeros_like.py +42 -0
  896. mindspore/ops/_op_impl/aicpu/zeta.py +31 -0
  897. mindspore/ops/_op_impl/akg/__init__.py +19 -0
  898. mindspore/ops/_op_impl/akg/ascend/__init__.py +48 -0
  899. mindspore/ops/_op_impl/akg/ascend/abs.py +35 -0
  900. mindspore/ops/_op_impl/akg/ascend/add.py +42 -0
  901. mindspore/ops/_op_impl/akg/ascend/add_n.py +37 -0
  902. mindspore/ops/_op_impl/akg/ascend/batchmatmul.py +33 -0
  903. mindspore/ops/_op_impl/akg/ascend/cast.py +46 -0
  904. mindspore/ops/_op_impl/akg/ascend/equal.py +35 -0
  905. mindspore/ops/_op_impl/akg/ascend/exp.py +35 -0
  906. mindspore/ops/_op_impl/akg/ascend/expand_dims.py +33 -0
  907. mindspore/ops/_op_impl/akg/ascend/greater.py +34 -0
  908. mindspore/ops/_op_impl/akg/ascend/greater_equal.py +35 -0
  909. mindspore/ops/_op_impl/akg/ascend/less.py +31 -0
  910. mindspore/ops/_op_impl/akg/ascend/less_equal.py +35 -0
  911. mindspore/ops/_op_impl/akg/ascend/load_im2col.py +33 -0
  912. mindspore/ops/_op_impl/akg/ascend/log.py +34 -0
  913. mindspore/ops/_op_impl/akg/ascend/maximum.py +36 -0
  914. mindspore/ops/_op_impl/akg/ascend/minimum.py +39 -0
  915. mindspore/ops/_op_impl/akg/ascend/mul.py +41 -0
  916. mindspore/ops/_op_impl/akg/ascend/neg.py +37 -0
  917. mindspore/ops/_op_impl/akg/ascend/pow.py +35 -0
  918. mindspore/ops/_op_impl/akg/ascend/prod_force_se_a.py +33 -0
  919. mindspore/ops/_op_impl/akg/ascend/real_div.py +36 -0
  920. mindspore/ops/_op_impl/akg/ascend/reciprocal.py +32 -0
  921. mindspore/ops/_op_impl/akg/ascend/reduce_max.py +32 -0
  922. mindspore/ops/_op_impl/akg/ascend/reduce_min.py +32 -0
  923. mindspore/ops/_op_impl/akg/ascend/reduce_sum.py +37 -0
  924. mindspore/ops/_op_impl/akg/ascend/rsqrt.py +35 -0
  925. mindspore/ops/_op_impl/akg/ascend/select.py +37 -0
  926. mindspore/ops/_op_impl/akg/ascend/sqrt.py +35 -0
  927. mindspore/ops/_op_impl/akg/ascend/square.py +35 -0
  928. mindspore/ops/_op_impl/akg/ascend/sub.py +42 -0
  929. mindspore/ops/_op_impl/akg/cpu/__init__.py +23 -0
  930. mindspore/ops/_op_impl/akg/cpu/coo2csr.py +29 -0
  931. mindspore/ops/_op_impl/akg/cpu/csr2coo.py +29 -0
  932. mindspore/ops/_op_impl/akg/cpu/csr_gather.py +33 -0
  933. mindspore/ops/_op_impl/akg/cpu/csr_mm.py +34 -0
  934. mindspore/ops/_op_impl/akg/cpu/csr_mul.py +33 -0
  935. mindspore/ops/_op_impl/akg/cpu/csr_mv.py +33 -0
  936. mindspore/ops/_op_impl/akg/cpu/csr_reduce_sum.py +31 -0
  937. mindspore/ops/_op_impl/akg/gpu/__init__.py +24 -0
  938. mindspore/ops/_op_impl/akg/gpu/coo2csr.py +29 -0
  939. mindspore/ops/_op_impl/akg/gpu/csr2coo.py +29 -0
  940. mindspore/ops/_op_impl/akg/gpu/csr_div.py +36 -0
  941. mindspore/ops/_op_impl/akg/gpu/csr_gather.py +33 -0
  942. mindspore/ops/_op_impl/akg/gpu/csr_mm.py +37 -0
  943. mindspore/ops/_op_impl/akg/gpu/csr_mul.py +36 -0
  944. mindspore/ops/_op_impl/akg/gpu/csr_mv.py +36 -0
  945. mindspore/ops/_op_impl/akg/gpu/csr_reduce_sum.py +33 -0
  946. mindspore/ops/_op_impl/cpu/__init__.py +78 -0
  947. mindspore/ops/_op_impl/cpu/adam.py +49 -0
  948. mindspore/ops/_op_impl/cpu/adam_weight_decay.py +47 -0
  949. mindspore/ops/_op_impl/cpu/arg_max.py +30 -0
  950. mindspore/ops/_op_impl/cpu/arg_max_with_value.py +31 -0
  951. mindspore/ops/_op_impl/cpu/arg_min_with_value.py +31 -0
  952. mindspore/ops/_op_impl/cpu/buffer_append.py +28 -0
  953. mindspore/ops/_op_impl/cpu/buffer_get.py +28 -0
  954. mindspore/ops/_op_impl/cpu/buffer_sample.py +28 -0
  955. mindspore/ops/_op_impl/cpu/cast.py +171 -0
  956. mindspore/ops/_op_impl/cpu/concat_offset.py +38 -0
  957. mindspore/ops/_op_impl/cpu/conv2d.py +30 -0
  958. mindspore/ops/_op_impl/cpu/conv3d.py +30 -0
  959. mindspore/ops/_op_impl/cpu/div.py +32 -0
  960. mindspore/ops/_op_impl/cpu/dropout.py +31 -0
  961. mindspore/ops/_op_impl/cpu/dropout_grad.py +30 -0
  962. mindspore/ops/_op_impl/cpu/dynamic_shape.py +42 -0
  963. mindspore/ops/_op_impl/cpu/dynamic_stitch.py +41 -0
  964. mindspore/ops/_op_impl/cpu/equal_count.py +30 -0
  965. mindspore/ops/_op_impl/cpu/gather_d.py +49 -0
  966. mindspore/ops/_op_impl/cpu/gather_d_grad.py +38 -0
  967. mindspore/ops/_op_impl/cpu/gather_d_grad_v2.py +40 -0
  968. mindspore/ops/_op_impl/cpu/gather_v2.py +40 -0
  969. mindspore/ops/_op_impl/cpu/hsigmoid.py +33 -0
  970. mindspore/ops/_op_impl/cpu/hsigmoid_grad.py +34 -0
  971. mindspore/ops/_op_impl/cpu/hswish.py +32 -0
  972. mindspore/ops/_op_impl/cpu/hswish_grad.py +33 -0
  973. mindspore/ops/_op_impl/cpu/identity_n.py +40 -0
  974. mindspore/ops/_op_impl/cpu/is_finite.py +39 -0
  975. mindspore/ops/_op_impl/cpu/l2loss.py +30 -0
  976. mindspore/ops/_op_impl/cpu/layer_norm.py +36 -0
  977. mindspore/ops/_op_impl/cpu/layer_norm_grad.py +38 -0
  978. mindspore/ops/_op_impl/cpu/maximum.py +35 -0
  979. mindspore/ops/_op_impl/cpu/maximum_grad.py +47 -0
  980. mindspore/ops/_op_impl/cpu/minimum.py +40 -0
  981. mindspore/ops/_op_impl/cpu/minimum_grad.py +51 -0
  982. mindspore/ops/_op_impl/cpu/mirror_pad.py +36 -0
  983. mindspore/ops/_op_impl/cpu/mirror_pad_grad.py +36 -0
  984. mindspore/ops/_op_impl/cpu/mul.py +32 -0
  985. mindspore/ops/_op_impl/cpu/one_hot.py +31 -0
  986. mindspore/ops/_op_impl/cpu/pad.py +32 -0
  987. mindspore/ops/_op_impl/cpu/pow.py +32 -0
  988. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +42 -0
  989. mindspore/ops/_op_impl/cpu/pyexecute.py +29 -0
  990. mindspore/ops/_op_impl/cpu/pyfunc.py +29 -0
  991. mindspore/ops/_op_impl/cpu/range.py +34 -0
  992. mindspore/ops/_op_impl/cpu/real_div.py +33 -0
  993. mindspore/ops/_op_impl/cpu/reduce_all.py +29 -0
  994. mindspore/ops/_op_impl/cpu/reduce_any.py +29 -0
  995. mindspore/ops/_op_impl/cpu/reduce_max.py +32 -0
  996. mindspore/ops/_op_impl/cpu/reduce_mean.py +40 -0
  997. mindspore/ops/_op_impl/cpu/reduce_min.py +32 -0
  998. mindspore/ops/_op_impl/cpu/reduce_prod.py +40 -0
  999. mindspore/ops/_op_impl/cpu/reduce_std.py +31 -0
  1000. mindspore/ops/_op_impl/cpu/reduce_sum.py +41 -0
  1001. mindspore/ops/_op_impl/cpu/space_to_batch_nd.py +38 -0
  1002. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  1003. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  1004. mindspore/ops/_op_impl/cpu/split.py +34 -0
  1005. mindspore/ops/_op_impl/cpu/sspaddmm.py +95 -0
  1006. mindspore/ops/_op_impl/cpu/stack.py +38 -0
  1007. mindspore/ops/_op_impl/cpu/sub.py +32 -0
  1008. mindspore/ops/_op_impl/cpu/tensor_copy_slices.py +41 -0
  1009. mindspore/ops/_op_impl/cpu/tile.py +37 -0
  1010. mindspore/ops/_op_impl/cpu/top_k.py +31 -0
  1011. mindspore/ops/_op_impl/cpu/transpose.py +39 -0
  1012. mindspore/ops/_primitive_cache.py +90 -0
  1013. mindspore/ops/_register_for_op.py +73 -0
  1014. mindspore/ops/_utils/__init__.py +20 -0
  1015. mindspore/ops/_utils/utils.py +147 -0
  1016. mindspore/ops/_vmap/__init__.py +25 -0
  1017. mindspore/ops/_vmap/vmap_array_ops.py +2149 -0
  1018. mindspore/ops/_vmap/vmap_base.py +533 -0
  1019. mindspore/ops/_vmap/vmap_convolution_ops.py +441 -0
  1020. mindspore/ops/_vmap/vmap_debug_ops.py +50 -0
  1021. mindspore/ops/_vmap/vmap_grad_math_ops.py +274 -0
  1022. mindspore/ops/_vmap/vmap_grad_nn_ops.py +806 -0
  1023. mindspore/ops/_vmap/vmap_image_ops.py +194 -0
  1024. mindspore/ops/_vmap/vmap_math_ops.py +993 -0
  1025. mindspore/ops/_vmap/vmap_nn_ops.py +2250 -0
  1026. mindspore/ops/_vmap/vmap_other_ops.py +105 -0
  1027. mindspore/ops/_vmap/vmap_random_ops.py +122 -0
  1028. mindspore/ops/_vmap/vmap_sparse_ops.py +89 -0
  1029. mindspore/ops/auto_generate/__init__.py +31 -0
  1030. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
  1031. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
  1032. mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
  1033. mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
  1034. mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
  1035. mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
  1036. mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
  1037. mindspore/ops/composite/__init__.py +71 -0
  1038. mindspore/ops/composite/base.py +1318 -0
  1039. mindspore/ops/composite/env_ops.py +41 -0
  1040. mindspore/ops/composite/math_ops.py +125 -0
  1041. mindspore/ops/composite/multitype_ops/__init__.py +77 -0
  1042. mindspore/ops/composite/multitype_ops/_compile_utils.py +1459 -0
  1043. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +897 -0
  1044. mindspore/ops/composite/multitype_ops/add_impl.py +606 -0
  1045. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +56 -0
  1046. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +56 -0
  1047. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +56 -0
  1048. mindspore/ops/composite/multitype_ops/div_impl.py +189 -0
  1049. mindspore/ops/composite/multitype_ops/equal_impl.py +335 -0
  1050. mindspore/ops/composite/multitype_ops/floordiv_impl.py +88 -0
  1051. mindspore/ops/composite/multitype_ops/getitem_impl.py +400 -0
  1052. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +109 -0
  1053. mindspore/ops/composite/multitype_ops/greater_impl.py +110 -0
  1054. mindspore/ops/composite/multitype_ops/in_impl.py +196 -0
  1055. mindspore/ops/composite/multitype_ops/left_shift_impl.py +37 -0
  1056. mindspore/ops/composite/multitype_ops/less_equal_impl.py +111 -0
  1057. mindspore/ops/composite/multitype_ops/less_impl.py +112 -0
  1058. mindspore/ops/composite/multitype_ops/logic_not_impl.py +113 -0
  1059. mindspore/ops/composite/multitype_ops/logical_and_impl.py +60 -0
  1060. mindspore/ops/composite/multitype_ops/logical_or_impl.py +61 -0
  1061. mindspore/ops/composite/multitype_ops/mod_impl.py +86 -0
  1062. mindspore/ops/composite/multitype_ops/mul_impl.py +294 -0
  1063. mindspore/ops/composite/multitype_ops/negative_impl.py +79 -0
  1064. mindspore/ops/composite/multitype_ops/not_equal_impl.py +290 -0
  1065. mindspore/ops/composite/multitype_ops/not_in_impl.py +196 -0
  1066. mindspore/ops/composite/multitype_ops/ones_like_impl.py +96 -0
  1067. mindspore/ops/composite/multitype_ops/pow_impl.py +87 -0
  1068. mindspore/ops/composite/multitype_ops/right_shift_impl.py +37 -0
  1069. mindspore/ops/composite/multitype_ops/setitem_impl.py +884 -0
  1070. mindspore/ops/composite/multitype_ops/sub_impl.py +116 -0
  1071. mindspore/ops/composite/multitype_ops/uadd_impl.py +29 -0
  1072. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +228 -0
  1073. mindspore/ops/deprecated.py +315 -0
  1074. mindspore/ops/function/__init__.py +782 -0
  1075. mindspore/ops/function/array_func.py +7226 -0
  1076. mindspore/ops/function/clip_func.py +384 -0
  1077. mindspore/ops/function/debug_func.py +181 -0
  1078. mindspore/ops/function/fft_func.py +44 -0
  1079. mindspore/ops/function/grad/__init__.py +34 -0
  1080. mindspore/ops/function/grad/grad_func.py +1425 -0
  1081. mindspore/ops/function/image_func.py +292 -0
  1082. mindspore/ops/function/linalg_func.py +416 -0
  1083. mindspore/ops/function/math_func.py +12228 -0
  1084. mindspore/ops/function/nn_func.py +8609 -0
  1085. mindspore/ops/function/other_func.py +115 -0
  1086. mindspore/ops/function/parameter_func.py +134 -0
  1087. mindspore/ops/function/random_func.py +1715 -0
  1088. mindspore/ops/function/reshard_func.py +104 -0
  1089. mindspore/ops/function/sparse_func.py +884 -0
  1090. mindspore/ops/function/sparse_unary_func.py +2422 -0
  1091. mindspore/ops/function/spectral_func.py +150 -0
  1092. mindspore/ops/function/vmap_func.py +117 -0
  1093. mindspore/ops/functional.py +464 -0
  1094. mindspore/ops/op_info_register.py +1572 -0
  1095. mindspore/ops/operations/__init__.py +722 -0
  1096. mindspore/ops/operations/_csr_ops.py +403 -0
  1097. mindspore/ops/operations/_custom_grad.py +181 -0
  1098. mindspore/ops/operations/_embedding_cache_ops.py +307 -0
  1099. mindspore/ops/operations/_grad_ops.py +2978 -0
  1100. mindspore/ops/operations/_infer_ops.py +19 -0
  1101. mindspore/ops/operations/_inner_ops.py +2544 -0
  1102. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  1103. mindspore/ops/operations/_ms_kernel.py +601 -0
  1104. mindspore/ops/operations/_ocr_ops.py +379 -0
  1105. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  1106. mindspore/ops/operations/_pyfunc_registry.py +58 -0
  1107. mindspore/ops/operations/_quant_ops.py +1844 -0
  1108. mindspore/ops/operations/_rl_inner_ops.py +1231 -0
  1109. mindspore/ops/operations/_scalar_ops.py +106 -0
  1110. mindspore/ops/operations/_sequence_ops.py +1155 -0
  1111. mindspore/ops/operations/_sparse_grad_ops.py +56 -0
  1112. mindspore/ops/operations/_tensor_array.py +359 -0
  1113. mindspore/ops/operations/_thor_ops.py +807 -0
  1114. mindspore/ops/operations/array_ops.py +6124 -0
  1115. mindspore/ops/operations/comm_ops.py +1985 -0
  1116. mindspore/ops/operations/control_ops.py +127 -0
  1117. mindspore/ops/operations/custom_ops.py +1129 -0
  1118. mindspore/ops/operations/debug_ops.py +678 -0
  1119. mindspore/ops/operations/image_ops.py +1041 -0
  1120. mindspore/ops/operations/inner_ops.py +697 -0
  1121. mindspore/ops/operations/linalg_ops.py +95 -0
  1122. mindspore/ops/operations/manually_defined/__init__.py +24 -0
  1123. mindspore/ops/operations/manually_defined/_inner.py +73 -0
  1124. mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
  1125. mindspore/ops/operations/math_ops.py +5095 -0
  1126. mindspore/ops/operations/nn_ops.py +9575 -0
  1127. mindspore/ops/operations/other_ops.py +874 -0
  1128. mindspore/ops/operations/random_ops.py +1288 -0
  1129. mindspore/ops/operations/reshard_ops.py +53 -0
  1130. mindspore/ops/operations/rl_ops.py +288 -0
  1131. mindspore/ops/operations/sparse_ops.py +2753 -0
  1132. mindspore/ops/operations/spectral_ops.py +111 -0
  1133. mindspore/ops/primitive.py +1046 -0
  1134. mindspore/ops/signature.py +54 -0
  1135. mindspore/ops/vm_impl_registry.py +91 -0
  1136. mindspore/ops_generate/__init__.py +27 -0
  1137. mindspore/ops_generate/arg_dtype_cast.py +252 -0
  1138. mindspore/ops_generate/arg_handler.py +197 -0
  1139. mindspore/ops_generate/gen_aclnn_implement.py +263 -0
  1140. mindspore/ops_generate/gen_constants.py +36 -0
  1141. mindspore/ops_generate/gen_ops.py +1099 -0
  1142. mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
  1143. mindspore/ops_generate/gen_pyboost_func.py +1052 -0
  1144. mindspore/ops_generate/gen_utils.py +209 -0
  1145. mindspore/ops_generate/op_proto.py +145 -0
  1146. mindspore/ops_generate/pyboost_utils.py +367 -0
  1147. mindspore/ops_generate/template.py +261 -0
  1148. mindspore/parallel/__init__.py +30 -0
  1149. mindspore/parallel/_auto_parallel_context.py +1486 -0
  1150. mindspore/parallel/_cell_wrapper.py +174 -0
  1151. mindspore/parallel/_cost_model_context.py +700 -0
  1152. mindspore/parallel/_dp_allreduce_fusion.py +159 -0
  1153. mindspore/parallel/_offload_context.py +275 -0
  1154. mindspore/parallel/_parallel_serialization.py +561 -0
  1155. mindspore/parallel/_ps_context.py +242 -0
  1156. mindspore/parallel/_recovery_context.py +110 -0
  1157. mindspore/parallel/_tensor.py +730 -0
  1158. mindspore/parallel/_transformer/__init__.py +35 -0
  1159. mindspore/parallel/_transformer/layers.py +765 -0
  1160. mindspore/parallel/_transformer/loss.py +251 -0
  1161. mindspore/parallel/_transformer/moe.py +693 -0
  1162. mindspore/parallel/_transformer/op_parallel_config.py +222 -0
  1163. mindspore/parallel/_transformer/transformer.py +3119 -0
  1164. mindspore/parallel/_utils.py +612 -0
  1165. mindspore/parallel/algo_parameter_config.py +400 -0
  1166. mindspore/parallel/checkpoint_transform.py +650 -0
  1167. mindspore/parallel/cluster/__init__.py +15 -0
  1168. mindspore/parallel/cluster/process_entity/__init__.py +18 -0
  1169. mindspore/parallel/cluster/process_entity/_api.py +352 -0
  1170. mindspore/parallel/cluster/process_entity/_utils.py +101 -0
  1171. mindspore/parallel/cluster/run.py +136 -0
  1172. mindspore/parallel/mpi/__init__.py +14 -0
  1173. mindspore/parallel/mpi/_mpi_config.py +116 -0
  1174. mindspore/parallel/parameter_broadcast.py +151 -0
  1175. mindspore/parallel/shard.py +481 -0
  1176. mindspore/parallel/transform_safetensors.py +993 -0
  1177. mindspore/profiler/__init__.py +28 -0
  1178. mindspore/profiler/common/__init__.py +14 -0
  1179. mindspore/profiler/common/constant.py +29 -0
  1180. mindspore/profiler/common/exceptions/__init__.py +14 -0
  1181. mindspore/profiler/common/exceptions/error_code.py +83 -0
  1182. mindspore/profiler/common/exceptions/exceptions.py +286 -0
  1183. mindspore/profiler/common/process_pool.py +41 -0
  1184. mindspore/profiler/common/registry.py +47 -0
  1185. mindspore/profiler/common/singleton.py +28 -0
  1186. mindspore/profiler/common/struct_type.py +118 -0
  1187. mindspore/profiler/common/util.py +472 -0
  1188. mindspore/profiler/common/validator/__init__.py +14 -0
  1189. mindspore/profiler/common/validator/validate_path.py +84 -0
  1190. mindspore/profiler/dynamic_profiler.py +694 -0
  1191. mindspore/profiler/envprofiling.py +254 -0
  1192. mindspore/profiler/parser/__init__.py +14 -0
  1193. mindspore/profiler/parser/aicpu_data_parser.py +272 -0
  1194. mindspore/profiler/parser/ascend_analysis/__init__.py +14 -0
  1195. mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
  1196. mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
  1197. mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
  1198. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
  1199. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
  1200. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
  1201. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  1202. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
  1203. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
  1204. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
  1205. mindspore/profiler/parser/ascend_cluster_generator.py +116 -0
  1206. mindspore/profiler/parser/ascend_communicate_generator.py +314 -0
  1207. mindspore/profiler/parser/ascend_flops_generator.py +116 -0
  1208. mindspore/profiler/parser/ascend_fpbp_generator.py +82 -0
  1209. mindspore/profiler/parser/ascend_hccl_generator.py +271 -0
  1210. mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
  1211. mindspore/profiler/parser/ascend_memory_generator.py +185 -0
  1212. mindspore/profiler/parser/ascend_msprof_exporter.py +282 -0
  1213. mindspore/profiler/parser/ascend_msprof_generator.py +187 -0
  1214. mindspore/profiler/parser/ascend_op_generator.py +334 -0
  1215. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  1216. mindspore/profiler/parser/ascend_timeline_generator.py +545 -0
  1217. mindspore/profiler/parser/base_timeline_generator.py +483 -0
  1218. mindspore/profiler/parser/container.py +229 -0
  1219. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +697 -0
  1220. mindspore/profiler/parser/flops_parser.py +531 -0
  1221. mindspore/profiler/parser/framework_enum.py +111 -0
  1222. mindspore/profiler/parser/framework_parser.py +464 -0
  1223. mindspore/profiler/parser/framework_struct.py +61 -0
  1224. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  1225. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  1226. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  1227. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  1228. mindspore/profiler/parser/hccl_parser.py +573 -0
  1229. mindspore/profiler/parser/hwts_log_parser.py +122 -0
  1230. mindspore/profiler/parser/integrator.py +526 -0
  1231. mindspore/profiler/parser/memory_usage_parser.py +277 -0
  1232. mindspore/profiler/parser/minddata_analyzer.py +800 -0
  1233. mindspore/profiler/parser/minddata_parser.py +186 -0
  1234. mindspore/profiler/parser/minddata_pipeline_parser.py +299 -0
  1235. mindspore/profiler/parser/op_intermediate_parser.py +149 -0
  1236. mindspore/profiler/parser/optime_parser.py +250 -0
  1237. mindspore/profiler/parser/profiler_info.py +213 -0
  1238. mindspore/profiler/parser/step_trace_parser.py +666 -0
  1239. mindspore/profiler/profiler.py +153 -0
  1240. mindspore/profiler/profiling.py +1922 -0
  1241. mindspore/rewrite/__init__.py +28 -0
  1242. mindspore/rewrite/api/__init__.py +17 -0
  1243. mindspore/rewrite/api/node.py +519 -0
  1244. mindspore/rewrite/api/node_type.py +53 -0
  1245. mindspore/rewrite/api/pattern_engine.py +490 -0
  1246. mindspore/rewrite/api/scoped_value.py +181 -0
  1247. mindspore/rewrite/api/symbol_tree.py +497 -0
  1248. mindspore/rewrite/ast_helpers/__init__.py +25 -0
  1249. mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
  1250. mindspore/rewrite/ast_helpers/ast_finder.py +404 -0
  1251. mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
  1252. mindspore/rewrite/ast_helpers/ast_modifier.py +605 -0
  1253. mindspore/rewrite/ast_helpers/ast_replacer.py +79 -0
  1254. mindspore/rewrite/common/__init__.py +19 -0
  1255. mindspore/rewrite/common/config.py +24 -0
  1256. mindspore/rewrite/common/error_log.py +39 -0
  1257. mindspore/rewrite/common/event.py +28 -0
  1258. mindspore/rewrite/common/namer.py +271 -0
  1259. mindspore/rewrite/common/namespace.py +118 -0
  1260. mindspore/rewrite/common/observable.py +44 -0
  1261. mindspore/rewrite/common/observer.py +54 -0
  1262. mindspore/rewrite/node/__init__.py +22 -0
  1263. mindspore/rewrite/node/call_function.py +95 -0
  1264. mindspore/rewrite/node/cell_container.py +139 -0
  1265. mindspore/rewrite/node/control_flow.py +113 -0
  1266. mindspore/rewrite/node/node.py +1428 -0
  1267. mindspore/rewrite/node/node_manager.py +283 -0
  1268. mindspore/rewrite/node/node_topological_manager.py +223 -0
  1269. mindspore/rewrite/parsers/__init__.py +29 -0
  1270. mindspore/rewrite/parsers/arguments_parser.py +63 -0
  1271. mindspore/rewrite/parsers/assign_parser.py +852 -0
  1272. mindspore/rewrite/parsers/attribute_parser.py +57 -0
  1273. mindspore/rewrite/parsers/class_def_parser.py +289 -0
  1274. mindspore/rewrite/parsers/constant_parser.py +104 -0
  1275. mindspore/rewrite/parsers/container_parser.py +88 -0
  1276. mindspore/rewrite/parsers/expr_parser.py +55 -0
  1277. mindspore/rewrite/parsers/for_parser.py +61 -0
  1278. mindspore/rewrite/parsers/function_def_parser.py +84 -0
  1279. mindspore/rewrite/parsers/if_parser.py +85 -0
  1280. mindspore/rewrite/parsers/module_parser.py +117 -0
  1281. mindspore/rewrite/parsers/parser.py +43 -0
  1282. mindspore/rewrite/parsers/parser_register.py +86 -0
  1283. mindspore/rewrite/parsers/return_parser.py +37 -0
  1284. mindspore/rewrite/parsers/while_parser.py +59 -0
  1285. mindspore/rewrite/sparsify/__init__.py +0 -0
  1286. mindspore/rewrite/sparsify/sparse_transformer.py +457 -0
  1287. mindspore/rewrite/sparsify/sparsify.py +112 -0
  1288. mindspore/rewrite/sparsify/utils.py +179 -0
  1289. mindspore/rewrite/symbol_tree/__init__.py +20 -0
  1290. mindspore/rewrite/symbol_tree/symbol_tree.py +1819 -0
  1291. mindspore/rewrite/symbol_tree/symbol_tree_builder.py +76 -0
  1292. mindspore/rewrite/symbol_tree/symbol_tree_dumper.py +142 -0
  1293. mindspore/run_check/__init__.py +20 -0
  1294. mindspore/run_check/_check_version.py +507 -0
  1295. mindspore/run_check/run_check.py +66 -0
  1296. mindspore/safeguard/__init__.py +18 -0
  1297. mindspore/safeguard/rewrite_obfuscation.py +875 -0
  1298. mindspore/scipy/__init__.py +18 -0
  1299. mindspore/scipy/fft.py +264 -0
  1300. mindspore/scipy/linalg.py +919 -0
  1301. mindspore/scipy/ops.py +165 -0
  1302. mindspore/scipy/ops_grad.py +115 -0
  1303. mindspore/scipy/ops_wrapper.py +74 -0
  1304. mindspore/scipy/optimize/__init__.py +20 -0
  1305. mindspore/scipy/optimize/_bfgs.py +230 -0
  1306. mindspore/scipy/optimize/_lagrange.py +201 -0
  1307. mindspore/scipy/optimize/_lbfgs.py +146 -0
  1308. mindspore/scipy/optimize/gradient_optimization_algorithm.py +168 -0
  1309. mindspore/scipy/optimize/line_search.py +370 -0
  1310. mindspore/scipy/optimize/linear_sum_assignment.py +78 -0
  1311. mindspore/scipy/optimize/minimize.py +200 -0
  1312. mindspore/scipy/utils.py +156 -0
  1313. mindspore/scipy/utils_const.py +246 -0
  1314. mindspore/train/__init__.py +48 -0
  1315. mindspore/train/_utils.py +465 -0
  1316. mindspore/train/amp.py +935 -0
  1317. mindspore/train/anf_ir_pb2.py +1517 -0
  1318. mindspore/train/callback/__init__.py +44 -0
  1319. mindspore/train/callback/_backup_and_restore.py +117 -0
  1320. mindspore/train/callback/_callback.py +613 -0
  1321. mindspore/train/callback/_checkpoint.py +814 -0
  1322. mindspore/train/callback/_cluster_monitor.py +201 -0
  1323. mindspore/train/callback/_dataset_graph.py +150 -0
  1324. mindspore/train/callback/_early_stop.py +239 -0
  1325. mindspore/train/callback/_flops_collector.py +239 -0
  1326. mindspore/train/callback/_history.py +92 -0
  1327. mindspore/train/callback/_lambda_callback.py +80 -0
  1328. mindspore/train/callback/_landscape.py +1049 -0
  1329. mindspore/train/callback/_loss_monitor.py +107 -0
  1330. mindspore/train/callback/_lr_scheduler_callback.py +76 -0
  1331. mindspore/train/callback/_on_request_exit.py +298 -0
  1332. mindspore/train/callback/_reduce_lr_on_plateau.py +226 -0
  1333. mindspore/train/callback/_summary_collector.py +1184 -0
  1334. mindspore/train/callback/_tft_register.py +352 -0
  1335. mindspore/train/callback/_time_monitor.py +141 -0
  1336. mindspore/train/checkpoint_pb2.py +233 -0
  1337. mindspore/train/data_sink.py +219 -0
  1338. mindspore/train/dataset_helper.py +692 -0
  1339. mindspore/train/lineage_pb2.py +1260 -0
  1340. mindspore/train/loss_scale_manager.py +213 -0
  1341. mindspore/train/memory_profiling_pb2.py +298 -0
  1342. mindspore/train/metrics/__init__.py +175 -0
  1343. mindspore/train/metrics/accuracy.py +133 -0
  1344. mindspore/train/metrics/auc.py +129 -0
  1345. mindspore/train/metrics/bleu_score.py +170 -0
  1346. mindspore/train/metrics/confusion_matrix.py +700 -0
  1347. mindspore/train/metrics/cosine_similarity.py +109 -0
  1348. mindspore/train/metrics/dice.py +116 -0
  1349. mindspore/train/metrics/error.py +175 -0
  1350. mindspore/train/metrics/fbeta.py +167 -0
  1351. mindspore/train/metrics/hausdorff_distance.py +333 -0
  1352. mindspore/train/metrics/loss.py +97 -0
  1353. mindspore/train/metrics/mean_surface_distance.py +189 -0
  1354. mindspore/train/metrics/metric.py +373 -0
  1355. mindspore/train/metrics/occlusion_sensitivity.py +225 -0
  1356. mindspore/train/metrics/perplexity.py +133 -0
  1357. mindspore/train/metrics/precision.py +160 -0
  1358. mindspore/train/metrics/recall.py +159 -0
  1359. mindspore/train/metrics/roc.py +223 -0
  1360. mindspore/train/metrics/root_mean_square_surface_distance.py +191 -0
  1361. mindspore/train/metrics/topk.py +167 -0
  1362. mindspore/train/mind_ir_pb2.py +1908 -0
  1363. mindspore/train/model.py +2252 -0
  1364. mindspore/train/node_strategy_pb2.py +653 -0
  1365. mindspore/train/print_pb2.py +184 -0
  1366. mindspore/train/profiling_parallel_pb2.py +151 -0
  1367. mindspore/train/serialization.py +3325 -0
  1368. mindspore/train/summary/__init__.py +23 -0
  1369. mindspore/train/summary/_lineage_adapter.py +41 -0
  1370. mindspore/train/summary/_summary_adapter.py +496 -0
  1371. mindspore/train/summary/_writer_pool.py +207 -0
  1372. mindspore/train/summary/enums.py +56 -0
  1373. mindspore/train/summary/summary_record.py +581 -0
  1374. mindspore/train/summary/writer.py +167 -0
  1375. mindspore/train/summary_pb2.py +1165 -0
  1376. mindspore/train/train_thor/__init__.py +20 -0
  1377. mindspore/train/train_thor/convert_utils.py +268 -0
  1378. mindspore/train/train_thor/dataset_helper.py +192 -0
  1379. mindspore/train/train_thor/model_thor.py +257 -0
  1380. mindspore/utils/__init__.py +21 -0
  1381. mindspore/utils/utils.py +60 -0
  1382. mindspore/version.py +1 -0
  1383. mindspore-2.4.0.dist-info/METADATA +352 -0
  1384. mindspore-2.4.0.dist-info/RECORD +1387 -0
  1385. mindspore-2.4.0.dist-info/WHEEL +5 -0
  1386. mindspore-2.4.0.dist-info/entry_points.txt +3 -0
  1387. mindspore-2.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3690 @@
1
+ # Copyright 2021-2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """
16
+ The module audio.transforms is inherited from _c_dataengine and is
17
+ implemented based on C++. It's a high performance module to process
18
+ audio. Users can apply suitable augmentations on audio data to improve
19
+ their training models.
20
+ """
21
+
22
+ import numpy as np
23
+
24
+ import mindspore._c_dataengine as cde
25
+ from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
26
+ NormType, ResampleMethod, ScaleType, WindowType
27
+ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
28
+ check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
29
+ check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
30
+ check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
31
+ check_highpass_biquad, check_inverse_mel_scale, check_inverse_spectrogram, check_lfcc, check_lfilter, \
32
+ check_lowpass_biquad, check_magphase, check_mask_along_axis, check_mask_along_axis_iid, check_masking, \
33
+ check_mel_scale, check_mel_spectrogram, check_mfcc, check_mu_law_coding, check_overdrive, check_phase_vocoder, \
34
+ check_phaser, check_pitch_shift, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
35
+ check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vad, check_vol
36
+ from ..transforms.py_transforms_util import Implementation
37
+ from ..transforms.transforms import TensorOperation
38
+
39
+
40
+ class AudioTensorOperation(TensorOperation):
41
+ """
42
+ Base class of Audio Tensor Ops.
43
+ """
44
+
45
+ def __init__(self):
46
+ super().__init__()
47
+ self.implementation = Implementation.C
48
+
49
+ def __call__(self, *input_tensor_list):
50
+ for tensor in input_tensor_list:
51
+ if not isinstance(tensor, (np.ndarray,)):
52
+ raise TypeError("Input should be NumPy audio, got {}.".format(type(tensor)))
53
+ return super().__call__(*input_tensor_list)
54
+
55
+ def parse(self):
56
+ raise NotImplementedError("AudioTensorOperation has to implement parse() method.")
57
+
58
+
59
+ class AllpassBiquad(AudioTensorOperation):
60
+ r"""
61
+ Design two-pole all-pass filter with central frequency and bandwidth for audio waveform.
62
+
63
+ An all-pass filter changes the audio's frequency to phase relationship without changing
64
+ its frequency to amplitude relationship. The system function is:
65
+
66
+ .. math::
67
+ H(s) = \frac{s^2 - \frac{s}{Q} + 1}{s^2 + \frac{s}{Q} + 1}
68
+
69
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
70
+
71
+ Note:
72
+ The shape of the audio waveform to be processed needs to be <..., time>.
73
+
74
+ Args:
75
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
76
+ central_freq (float): Central frequency (in Hz).
77
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
78
+ in range of (0, 1]. Default: ``0.707``.
79
+
80
+ Raises:
81
+ TypeError: If `sample_rate` is not of type int.
82
+ ValueError: If `sample_rate` is 0.
83
+ TypeError: If `central_freq` is not of type float.
84
+ TypeError: If `Q` is not of type float.
85
+ ValueError: If `Q` is not in range of (0, 1].
86
+ RuntimeError: If input tensor is not in shape of <..., time>.
87
+
88
+ Supported Platforms:
89
+ ``CPU``
90
+
91
+ Examples:
92
+ >>> import numpy as np
93
+ >>> import mindspore.dataset as ds
94
+ >>> import mindspore.dataset.audio as audio
95
+ >>>
96
+ >>> # Use the transform in dataset pipeline mode.
97
+ >>> waveform = np.random.random([5, 16]) # 5 samples
98
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
99
+ >>> transforms = [audio.AllpassBiquad(44100, 200.0)]
100
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
101
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
102
+ ... print(item["audio"].shape, item["audio"].dtype)
103
+ ... break
104
+ (16,) float64
105
+ >>>
106
+ >>> # Use the transform in eager mode
107
+ >>> waveform = np.random.random([16]) # 1 sample
108
+ >>> output = audio.AllpassBiquad(44100, 200.0)(waveform)
109
+ >>> print(output.shape, output.dtype)
110
+ (16,) float64
111
+
112
+ Tutorial Examples:
113
+ - `Illustration of audio transforms
114
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
115
+ """
116
+
117
+ @check_allpass_biquad
118
+ def __init__(self, sample_rate, central_freq, Q=0.707):
119
+ super().__init__()
120
+ self.sample_rate = sample_rate
121
+ self.central_freq = central_freq
122
+ self.quality_factor = Q
123
+
124
+ def parse(self):
125
+ return cde.AllpassBiquadOperation(self.sample_rate, self.central_freq, self.quality_factor)
126
+
127
+
128
+ DE_C_SCALE_TYPE = {ScaleType.POWER: cde.ScaleType.DE_SCALE_TYPE_POWER,
129
+ ScaleType.MAGNITUDE: cde.ScaleType.DE_SCALE_TYPE_MAGNITUDE}
130
+
131
+
132
+ class AmplitudeToDB(AudioTensorOperation):
133
+ r"""
134
+ Turn the input audio waveform from the amplitude/power scale to decibel scale.
135
+
136
+ Note:
137
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
138
+
139
+ Args:
140
+ stype (ScaleType, optional): Scale of the input waveform, which can be
141
+ ``ScaleType.POWER`` or ``ScaleType.MAGNITUDE``. Default: ``ScaleType.POWER``.
142
+ ref_value (float, optional): Multiplier reference value for generating
143
+ `db_multiplier` . Default: ``1.0``. The formula is
144
+
145
+ :math:`\text{db_multiplier} = \log10(\max(\text{ref_value}, amin))` .
146
+
147
+ amin (float, optional): Lower bound to clamp the input waveform, which must
148
+ be greater than zero. Default: ``1e-10``.
149
+ top_db (float, optional): Minimum cut-off decibels, which must be non-negative. Default: ``80.0``.
150
+
151
+ Raises:
152
+ TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.ScaleType` .
153
+ TypeError: If `ref_value` is not of type float.
154
+ ValueError: If `ref_value` is not a positive number.
155
+ TypeError: If `amin` is not of type float.
156
+ ValueError: If `amin` is not a positive number.
157
+ TypeError: If `top_db` is not of type float.
158
+ ValueError: If `top_db` is not a positive number.
159
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
160
+
161
+ Supported Platforms:
162
+ ``CPU``
163
+
164
+ Examples:
165
+ >>> import numpy as np
166
+ >>> import mindspore.dataset as ds
167
+ >>> import mindspore.dataset.audio as audio
168
+ >>>
169
+ >>> # Use the transform in dataset pipeline mode
170
+ >>> waveform = np.random.random([5, 400 // 2 + 1, 30]) # 5 samples
171
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
172
+ >>> transforms = [audio.AmplitudeToDB(stype=audio.ScaleType.POWER)]
173
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
174
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
175
+ ... print(item["audio"].shape, item["audio"].dtype)
176
+ ... break
177
+ (201, 30) float64
178
+ >>>
179
+ >>> # Use the transform in eager mode
180
+ >>> waveform = np.random.random([400 // 2 + 1, 30]) # 1 sample
181
+ >>> output = audio.AmplitudeToDB(stype=audio.ScaleType.POWER)(waveform)
182
+ >>> print(output.shape, output.dtype)
183
+ (201, 30) float64
184
+
185
+ Tutorial Examples:
186
+ - `Illustration of audio transforms
187
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
188
+ """
189
+
190
+ @check_amplitude_to_db
191
+ def __init__(self, stype=ScaleType.POWER, ref_value=1.0, amin=1e-10, top_db=80.0):
192
+ super().__init__()
193
+ self.stype = stype
194
+ self.ref_value = ref_value
195
+ self.amin = amin
196
+ self.top_db = top_db
197
+
198
+ def parse(self):
199
+ return cde.AmplitudeToDBOperation(DE_C_SCALE_TYPE.get(self.stype), self.ref_value, self.amin, self.top_db)
200
+
201
+
202
+ class Angle(AudioTensorOperation):
203
+ """
204
+ Calculate the angle of complex number sequence.
205
+
206
+ Note:
207
+ The shape of the audio waveform to be processed needs to be <..., complex=2>.
208
+ The first dimension represents the real part while the second represents the imaginary.
209
+
210
+ Raises:
211
+ RuntimeError: If input tensor is not in shape of <..., complex=2>.
212
+
213
+ Supported Platforms:
214
+ ``CPU``
215
+
216
+ Examples:
217
+ >>> import numpy as np
218
+ >>> import mindspore.dataset as ds
219
+ >>> import mindspore.dataset.audio as audio
220
+ >>>
221
+ >>> # Use the transform in dataset pipeline mode
222
+ >>> waveform = np.random.random([5, 16, 2]) # 5 samples
223
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
224
+ >>> transforms = [audio.Angle()]
225
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
226
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
227
+ ... print(item["audio"].shape, item["audio"].dtype)
228
+ ... break
229
+ (16,) float64
230
+ >>>
231
+ >>> # Use the transform in eager mode
232
+ >>> waveform = np.random.random([16, 2]) # 1 sample
233
+ >>> output = audio.Angle()(waveform)
234
+ >>> print(output.shape, output.dtype)
235
+ (16,) float64
236
+
237
+ Tutorial Examples:
238
+ - `Illustration of audio transforms
239
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
240
+ """
241
+
242
+ def parse(self):
243
+ return cde.AngleOperation()
244
+
245
+
246
+ class BandBiquad(AudioTensorOperation):
247
+ """
248
+ Design two-pole band-pass filter for audio waveform.
249
+
250
+ The frequency response drops logarithmically around the center frequency. The
251
+ bandwidth gives the slope of the drop. The frequencies at band edge will be
252
+ half of their original amplitudes.
253
+
254
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
255
+
256
+ Note:
257
+ The shape of the audio waveform to be processed needs to be <..., time>.
258
+
259
+ Args:
260
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
261
+ central_freq (float): Central frequency (in Hz).
262
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
263
+ in range of (0, 1]. Default: ``0.707``.
264
+ noise (bool, optional) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
265
+ If ``False``, uses mode oriented to pitched audio, i.e. voice, singing, or instrumental music.
266
+ Default: ``False``.
267
+
268
+ Raises:
269
+ TypeError: If `sample_rate` is not of type int.
270
+ ValueError: If `sample_rate` is 0.
271
+ TypeError: If `central_freq` is not of type float.
272
+ TypeError: If `Q` is not of type float.
273
+ ValueError: If `Q` is not in range of (0, 1].
274
+ TypeError: If `noise` is not of type bool.
275
+ RuntimeError: If input tensor is not in shape of <..., time>.
276
+
277
+ Supported Platforms:
278
+ ``CPU``
279
+
280
+ Examples:
281
+ >>> import numpy as np
282
+ >>> import mindspore.dataset as ds
283
+ >>> import mindspore.dataset.audio as audio
284
+ >>>
285
+ >>> # Use the transform in dataset pipeline mode
286
+ >>> waveform = np.random.random([5, 16]) # 5 samples
287
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
288
+ >>> transforms = [audio.BandBiquad(44100, 200.0)]
289
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
290
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
291
+ ... print(item["audio"].shape, item["audio"].dtype)
292
+ ... break
293
+ (16,) float64
294
+ >>>
295
+ >>> # Use the transform in eager mode
296
+ >>> waveform = np.random.random([16]) # 1 sample
297
+ >>> output = audio.BandBiquad(44100, 200.0)(waveform)
298
+ >>> print(output.shape, output.dtype)
299
+ (16,) float64
300
+
301
+ Tutorial Examples:
302
+ - `Illustration of audio transforms
303
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
304
+ """
305
+
306
+ @check_band_biquad
307
+ def __init__(self, sample_rate, central_freq, Q=0.707, noise=False):
308
+ super().__init__()
309
+ self.sample_rate = sample_rate
310
+ self.central_freq = central_freq
311
+ self.quality_factor = Q
312
+ self.noise = noise
313
+
314
+ def parse(self):
315
+ return cde.BandBiquadOperation(self.sample_rate, self.central_freq, self.quality_factor, self.noise)
316
+
317
+
318
+ class BandpassBiquad(AudioTensorOperation):
319
+ r"""
320
+ Design two-pole Butterworth band-pass filter for audio waveform.
321
+
322
+ The frequency response of the Butterworth filter is maximally flat (i.e. has no ripples)
323
+ in the passband and rolls off towards zero in the stopband.
324
+
325
+ The system function of Butterworth band-pass filter is:
326
+
327
+ .. math::
328
+ H(s) = \begin{cases}
329
+ \frac{s}{s^2 + \frac{s}{Q} + 1}, &\text{if const_skirt_gain=True}; \cr
330
+ \frac{\frac{s}{Q}}{s^2 + \frac{s}{Q} + 1}, &\text{if const_skirt_gain=False}.
331
+ \end{cases}
332
+
333
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
334
+
335
+ Note:
336
+ The shape of the audio waveform to be processed needs to be <..., time>.
337
+
338
+ Args:
339
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
340
+ central_freq (float): Central frequency (in Hz).
341
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
342
+ in range of (0, 1]. Default: ``0.707``.
343
+ const_skirt_gain (bool, optional) : If ``True``, uses a constant skirt gain (peak gain = Q);
344
+ If ``False``, uses a constant 0dB peak gain. Default: ``False``.
345
+
346
+ Raises:
347
+ TypeError: If `sample_rate` is not of type int.
348
+ ValueError: If `sample_rate` is 0.
349
+ TypeError: If `central_freq` is not of type float.
350
+ TypeError: If `Q` is not of type float.
351
+ ValueError: If `Q` is not in range of (0, 1].
352
+ TypeError: If `const_skirt_gain` is not of type bool.
353
+ RuntimeError: If input tensor is not in shape of <..., time>.
354
+
355
+ Supported Platforms:
356
+ ``CPU``
357
+
358
+ Examples:
359
+ >>> import numpy as np
360
+ >>> import mindspore.dataset as ds
361
+ >>> import mindspore.dataset.audio as audio
362
+ >>>
363
+ >>> # Use the transform in dataset pipeline mode
364
+ >>> waveform = np.random.random([5, 16]) # 5 samples
365
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
366
+ >>> transforms = [audio.BandpassBiquad(44100, 200.0)]
367
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
368
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
369
+ ... print(item["audio"].shape, item["audio"].dtype)
370
+ ... break
371
+ (16,) float64
372
+ >>>
373
+ >>> # Use the transform in eager mode
374
+ >>> waveform = np.random.random([16]) # 1 sample
375
+ >>> output = audio.BandpassBiquad(44100, 200.0)(waveform)
376
+ >>> print(output.shape, output.dtype)
377
+ (16,) float64
378
+
379
+ Tutorial Examples:
380
+ - `Illustration of audio transforms
381
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
382
+ """
383
+
384
+ @check_bandpass_biquad
385
+ def __init__(self, sample_rate, central_freq, Q=0.707, const_skirt_gain=False):
386
+ super().__init__()
387
+ self.sample_rate = sample_rate
388
+ self.central_freq = central_freq
389
+ self.quality_factor = Q
390
+ self.const_skirt_gain = const_skirt_gain
391
+
392
+ def parse(self):
393
+ return cde.BandpassBiquadOperation(self.sample_rate, self.central_freq, self.quality_factor,
394
+ self.const_skirt_gain)
395
+
396
+
397
+ class BandrejectBiquad(AudioTensorOperation):
398
+ r"""
399
+ Design two-pole Butterworth band-reject filter for audio waveform.
400
+
401
+ The frequency response of the Butterworth filter is maximally flat (i.e. has no ripples)
402
+ in the passband and rolls off towards zero in the stopband.
403
+
404
+ The system function of Butterworth band-reject filter is:
405
+
406
+ .. math::
407
+ H(s) = \frac{s^2 + 1}{s^2 + \frac{s}{Q} + 1}
408
+
409
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
410
+
411
+ Note:
412
+ The shape of the audio waveform to be processed needs to be <..., time>.
413
+
414
+ Args:
415
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
416
+ central_freq (float): Central frequency (in Hz).
417
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
418
+ in range of (0, 1]. Default: ``0.707``.
419
+
420
+ Raises:
421
+ TypeError: If `sample_rate` is not of type int.
422
+ ValueError: If `sample_rate` is 0.
423
+ TypeError: If `central_freq` is not of type float.
424
+ TypeError: If `Q` is not of type float.
425
+ ValueError: If `Q` is not in range of (0, 1].
426
+ RuntimeError: If input tensor is not in shape of <..., time>.
427
+
428
+ Supported Platforms:
429
+ ``CPU``
430
+
431
+ Examples:
432
+ >>> import numpy as np
433
+ >>> import mindspore.dataset as ds
434
+ >>> import mindspore.dataset.audio as audio
435
+ >>>
436
+ >>> # Use the transform in dataset pipeline mode
437
+ >>> waveform = np.random.random([5, 16]) # 5 samples
438
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
439
+ >>> transforms = [audio.BandrejectBiquad(44100, 200.0)]
440
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
441
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
442
+ ... print(item["audio"].shape, item["audio"].dtype)
443
+ ... break
444
+ (16,) float64
445
+ >>>
446
+ >>> # Use the transform in eager mode
447
+ >>> waveform = np.random.random([16]) # 1 sample
448
+ >>> output = audio.BandrejectBiquad(44100, 200.0)(waveform)
449
+ >>> print(output.shape, output.dtype)
450
+ (16,) float64
451
+
452
+ Tutorial Examples:
453
+ - `Illustration of audio transforms
454
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
455
+ """
456
+
457
+ @check_bandreject_biquad
458
+ def __init__(self, sample_rate, central_freq, Q=0.707):
459
+ super().__init__()
460
+ self.sample_rate = sample_rate
461
+ self.central_freq = central_freq
462
+ self.quality_factor = Q
463
+
464
+ def parse(self):
465
+ return cde.BandrejectBiquadOperation(self.sample_rate, self.central_freq, self.quality_factor)
466
+
467
+
468
+ class BassBiquad(AudioTensorOperation):
469
+ r"""
470
+ Design a bass tone-control effect, also known as two-pole low-shelf filter for audio waveform.
471
+
472
+ A low-shelf filter passes all frequencies, but increase or reduces frequencies below the shelf
473
+ frequency by specified amount. The system function is:
474
+
475
+ .. math::
476
+ H(s) = A\frac{s^2 + \frac{\sqrt{A}}{Q}s + A}{As^2 + \frac{\sqrt{A}}{Q}s + 1}
477
+
478
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
479
+
480
+ Note:
481
+ The shape of the audio waveform to be processed needs to be <..., time>.
482
+
483
+ Args:
484
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
485
+ gain (float): Desired gain at the boost (or attenuation) in dB.
486
+ central_freq (float, optional): Central frequency (in Hz). Default: ``100.0``.
487
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
488
+ in range of (0, 1]. Default: ``0.707``.
489
+
490
+ Raises:
491
+ TypeError: If `sample_rate` is not of type int.
492
+ ValueError: If `sample_rate` is 0.
493
+ TypeError: If `gain` is not of type float.
494
+ TypeError: If `central_freq` is not of type float.
495
+ TypeError: If `Q` is not of type float.
496
+ ValueError: If `Q` is not in range of (0, 1].
497
+ RuntimeError: If input tensor is not in shape of <..., time>.
498
+
499
+ Supported Platforms:
500
+ ``CPU``
501
+
502
+ Examples:
503
+ >>> import numpy as np
504
+ >>> import mindspore.dataset as ds
505
+ >>> import mindspore.dataset.audio as audio
506
+ >>>
507
+ >>> # Use the transform in dataset pipeline mode
508
+ >>> waveform = np.random.random([5, 16]) # 5 samples
509
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
510
+ >>> transforms = [audio.BassBiquad(44100, 100.0)]
511
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
512
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
513
+ ... print(item["audio"].shape, item["audio"].dtype)
514
+ ... break
515
+ (16,) float64
516
+ >>>
517
+ >>> # Use the transform in eager mode
518
+ >>> waveform = np.random.random([16]) # 1 sample
519
+ >>> output = audio.BassBiquad(44100, 200.0)(waveform)
520
+ >>> print(output.shape, output.dtype)
521
+ (16,) float64
522
+
523
+ Tutorial Examples:
524
+ - `Illustration of audio transforms
525
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
526
+ """
527
+
528
+ @check_bass_biquad
529
+ def __init__(self, sample_rate, gain, central_freq=100.0, Q=0.707):
530
+ super().__init__()
531
+ self.sample_rate = sample_rate
532
+ self.gain = gain
533
+ self.central_freq = central_freq
534
+ self.quality_factor = Q
535
+
536
+ def parse(self):
537
+ return cde.BassBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.quality_factor)
538
+
539
+
540
+ class Biquad(TensorOperation):
541
+ """
542
+ Perform a biquad filter of input audio.
543
+ Mathematical fomulas refer to: `Digital_biquad_filter <https://en.wikipedia.org/wiki/Digital_biquad_filter>`_ .
544
+
545
+ Args:
546
+ b0 (float): Numerator coefficient of current input, x[n].
547
+ b1 (float): Numerator coefficient of input one time step ago x[n-1].
548
+ b2 (float): Numerator coefficient of input two time steps ago x[n-2].
549
+ a0 (float): Denominator coefficient of current output y[n], the value can't be 0, typically 1.
550
+ a1 (float): Denominator coefficient of current output y[n-1].
551
+ a2 (float): Denominator coefficient of current output y[n-2].
552
+
553
+ Raises:
554
+ TypeError: If `b0` is not of type float.
555
+ TypeError: If `b1` is not of type float.
556
+ TypeError: If `b2` is not of type float.
557
+ TypeError: If `a0` is not of type float.
558
+ TypeError: If `a1` is not of type float.
559
+ TypeError: If `a2` is not of type float.
560
+ ValueError: If `a0` is 0.
561
+
562
+ Supported Platforms:
563
+ ``CPU``
564
+
565
+ Examples:
566
+ >>> import numpy as np
567
+ >>> import mindspore.dataset as ds
568
+ >>> import mindspore.dataset.audio as audio
569
+ >>>
570
+ >>> # Use the transform in dataset pipeline mode
571
+ >>> waveform = np.random.random([5, 16]) # 5 samples
572
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
573
+ >>> transforms = [audio.Biquad(0.01, 0.02, 0.13, 1, 0.12, 0.3)]
574
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
575
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
576
+ ... print(item["audio"].shape, item["audio"].dtype)
577
+ ... break
578
+ (16,) float64
579
+ >>>
580
+ >>> # Use the transform in eager mode
581
+ >>> waveform = np.random.random([16]) # 1 sample
582
+ >>> output = audio.Biquad(0.01, 0.02, 0.13, 1, 0.12, 0.3)(waveform)
583
+ >>> print(output.shape, output.dtype)
584
+ (16,) float64
585
+
586
+ Tutorial Examples:
587
+ - `Illustration of audio transforms
588
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
589
+ """
590
+
591
+ @check_biquad
592
+ def __init__(self, b0, b1, b2, a0, a1, a2):
593
+ super().__init__()
594
+ self.b0 = b0
595
+ self.b1 = b1
596
+ self.b2 = b2
597
+ self.a0 = a0
598
+ self.a1 = a1
599
+ self.a2 = a2
600
+
601
+ def parse(self):
602
+ return cde.BiquadOperation(self.b0, self.b1, self.b2, self.a0, self.a1, self.a2)
603
+
604
+
605
+ class ComplexNorm(AudioTensorOperation):
606
+ """
607
+ Compute the norm of complex number sequence.
608
+
609
+ Note:
610
+ The shape of the audio waveform to be processed needs to be <..., complex=2>.
611
+ The first dimension represents the real part while the second represents the imaginary.
612
+
613
+ Args:
614
+ power (float, optional): Power of the norm, which must be non-negative. Default: ``1.0``.
615
+
616
+ Raises:
617
+ TypeError: If `power` is not of type float.
618
+ ValueError: If `power` is a negative number.
619
+ RuntimeError: If input tensor is not in shape of <..., complex=2>.
620
+
621
+ Supported Platforms:
622
+ ``CPU``
623
+
624
+ Examples:
625
+ >>> import numpy as np
626
+ >>> import mindspore.dataset as ds
627
+ >>> import mindspore.dataset.audio as audio
628
+ >>>
629
+ >>> # Use the transform in dataset pipeline mode
630
+ >>> waveform = np.random.random([5, 16, 2]) # 5 samples
631
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
632
+ >>> transforms = [audio.ComplexNorm()]
633
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
634
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
635
+ ... print(item["audio"].shape, item["audio"].dtype)
636
+ ... break
637
+ (16,) float64
638
+ >>>
639
+ >>> # Use the transform in eager mode
640
+ >>> waveform = np.random.random([16, 2]) # 1 samples
641
+ >>> output = audio.ComplexNorm()(waveform)
642
+ >>> print(output.shape, output.dtype)
643
+ (16,) float64
644
+
645
+ Tutorial Examples:
646
+ - `Illustration of audio transforms
647
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
648
+ """
649
+
650
+ @check_complex_norm
651
+ def __init__(self, power=1.0):
652
+ super().__init__()
653
+ self.power = power
654
+
655
+ def parse(self):
656
+ return cde.ComplexNormOperation(self.power)
657
+
658
+
659
+ DE_C_BORDER_TYPE = {
660
+ BorderType.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
661
+ BorderType.EDGE: cde.BorderType.DE_BORDER_EDGE,
662
+ BorderType.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
663
+ BorderType.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC,
664
+ }
665
+
666
+
667
+ class ComputeDeltas(AudioTensorOperation):
668
+ r"""
669
+ Compute delta coefficients, also known as differential coefficients, of a spectrogram.
670
+
671
+ Delta coefficients help to understand the dynamics of the power spectrum. It can be
672
+ computed using the following formula.
673
+
674
+ .. math::
675
+ d_{t}=\frac{{\textstyle\sum_{n=1}^{N}}n(c_{t+n}-c_{t-n})}{2{\textstyle\sum_{n=1}^{N}}n^{2}}
676
+
677
+ where :math:`d_{t}` is the deltas at time :math:`t` , :math:`c_{t}` is the spectrogram coefficients
678
+ at time :math:`t` , :math:`N` is :math:`(\text{win_length} - 1) // 2` .
679
+
680
+ Args:
681
+ win_length (int, optional): The window length used for computing delta, must be no less than 3. Default: ``5``.
682
+ pad_mode (BorderType, optional): Mode parameter passed to padding, can be ``BorderType.CONSTANT``,
683
+ ``BorderType.EDGE``, ``BorderType.REFLECT`` or ``BorderType.SYMMETRIC``. Default: ``BorderType.EDGE``.
684
+
685
+ - ``BorderType.CONSTANT``, pad with a constant value.
686
+ - ``BorderType.EDGE``, pad with the last value on the edge.
687
+ - ``BorderType.REFLECT``, reflect the value on the edge while omitting the last one.
688
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
689
+ - ``BorderType.SYMMETRIC``, reflect the value on the edge while repeating the last one.
690
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
691
+
692
+ Raises:
693
+ TypeError: If `win_length` is not of type int.
694
+ ValueError: If `win_length` is less than 3.
695
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
696
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
697
+
698
+ Supported Platforms:
699
+ ``CPU``
700
+
701
+ Examples:
702
+ >>> import numpy as np
703
+ >>> import mindspore.dataset as ds
704
+ >>> import mindspore.dataset.audio as audio
705
+ >>>
706
+ >>> # Use the transform in dataset pipeline mode
707
+ >>> waveform = np.random.random([5, 400 // 2 + 1, 30]) # 5 samples
708
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
709
+ >>> transforms = [audio.ComputeDeltas(win_length=7, pad_mode=audio.BorderType.EDGE)]
710
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
711
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
712
+ ... print(item["audio"].shape, item["audio"].dtype)
713
+ ... break
714
+ (201, 30) float64
715
+ >>>
716
+ >>> # Use the transform in eager mode
717
+ >>> waveform = np.random.random([400 // 2 + 1, 30]) # 1 sample
718
+ >>> output = audio.ComputeDeltas(win_length=7, pad_mode=audio.BorderType.EDGE)(waveform)
719
+ >>> print(output.shape, output.dtype)
720
+ (201, 30) float64
721
+
722
+ Tutorial Examples:
723
+ - `Illustration of audio transforms
724
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
725
+ """
726
+
727
+ @check_compute_deltas
728
+ def __init__(self, win_length=5, pad_mode=BorderType.EDGE):
729
+ super().__init__()
730
+ self.win_len = win_length
731
+ self.pad_mode = pad_mode
732
+
733
+ def parse(self):
734
+ return cde.ComputeDeltasOperation(self.win_len, DE_C_BORDER_TYPE.get(self.pad_mode))
735
+
736
+
737
+ class Contrast(AudioTensorOperation):
738
+ """
739
+ Apply contrast effect for audio waveform.
740
+
741
+ Comparable with compression, this effect modifies an audio signal to make it sound louder.
742
+
743
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
744
+
745
+ Note:
746
+ The shape of the audio waveform to be processed needs to be <..., time>.
747
+
748
+ Args:
749
+ enhancement_amount (float, optional): Controls the amount of the enhancement,
750
+ in range of [0, 100]. Default: ``75.0``. Note that `enhancement_amount` equal
751
+ to 0 still gives a significant contrast enhancement.
752
+
753
+ Raises:
754
+ TypeError: If `enhancement_amount` is not of type float.
755
+ ValueError: If `enhancement_amount` is not in range [0, 100].
756
+ RuntimeError: If input tensor is not in shape of <..., time>.
757
+
758
+ Supported Platforms:
759
+ ``CPU``
760
+
761
+ Examples:
762
+ >>> import numpy as np
763
+ >>> import mindspore.dataset as ds
764
+ >>> import mindspore.dataset.audio as audio
765
+ >>>
766
+ >>> # Use the transform in dataset pipeline mode
767
+ >>> waveform = np.random.random([5, 16]) # 5 samples
768
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
769
+ >>> transforms = [audio.Contrast()]
770
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
771
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
772
+ ... print(item["audio"].shape, item["audio"].dtype)
773
+ ... break
774
+ (16,) float64
775
+ >>>
776
+ >>> # Use the transform in eager mode
777
+ >>> waveform = np.random.random([16]) # 1 sample
778
+ >>> output = audio.Contrast()(waveform)
779
+ >>> print(output.shape, output.dtype)
780
+ (16,) float64
781
+
782
+ Tutorial Examples:
783
+ - `Illustration of audio transforms
784
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
785
+ """
786
+
787
+ @check_contrast
788
+ def __init__(self, enhancement_amount=75.0):
789
+ super().__init__()
790
+ self.enhancement_amount = enhancement_amount
791
+
792
+ def parse(self):
793
+ return cde.ContrastOperation(self.enhancement_amount)
794
+
795
+
796
+ class DBToAmplitude(AudioTensorOperation):
797
+ """
798
+ Turn a waveform from the decibel scale to the power/amplitude scale.
799
+
800
+ Args:
801
+ ref (float): Reference which the output will be scaled by.
802
+ power (float): If power equals 1, will compute DB to power. If 0.5, will compute DB to amplitude.
803
+
804
+ Raises:
805
+ TypeError: If `ref` is not of type float.
806
+ TypeError: If `power` is not of type float.
807
+
808
+ Supported Platforms:
809
+ ``CPU``
810
+
811
+ Examples:
812
+ >>> import numpy as np
813
+ >>> import mindspore.dataset as ds
814
+ >>> import mindspore.dataset.audio as audio
815
+ >>>
816
+ >>> # Use the transform in dataset pipeline mode
817
+ >>> waveform = np.random.random([5, 16]) # 5 samples
818
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
819
+ >>> transforms = [audio.DBToAmplitude(0.5, 0.5)]
820
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
821
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
822
+ ... print(item["audio"].shape, item["audio"].dtype)
823
+ ... break
824
+ (16,) float64
825
+ >>>
826
+ >>> # Use the transform in eager mode
827
+ >>> waveform = np.random.random([16]) # 1 sample
828
+ >>> output = audio.DBToAmplitude(0.5, 0.5)(waveform)
829
+ >>> print(output.shape, output.dtype)
830
+ (16,) float64
831
+
832
+ Tutorial Examples:
833
+ - `Illustration of audio transforms
834
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
835
+ """
836
+
837
+ @check_db_to_amplitude
838
+ def __init__(self, ref, power):
839
+ super().__init__()
840
+ self.ref = ref
841
+ self.power = power
842
+
843
+ def parse(self):
844
+ return cde.DBToAmplitudeOperation(self.ref, self.power)
845
+
846
+
847
+ class DCShift(AudioTensorOperation):
848
+ """
849
+ Apply a DC shift to the audio. This can be useful to remove DC offset from audio.
850
+
851
+ Args:
852
+ shift (float): The amount to shift the audio, the value must be in the range [-2.0, 2.0].
853
+ limiter_gain (float, optional): Used only on peaks to prevent clipping,
854
+ the value should be much less than 1, such as ``0.05`` or ``0.02``. Default: ``None``,
855
+ will be set to `shift` .
856
+
857
+ Raises:
858
+ TypeError: If `shift` is not of type float.
859
+ ValueError: If `shift` is not in range [-2.0, 2.0].
860
+ TypeError: If `limiter_gain` is not of type float.
861
+
862
+ Supported Platforms:
863
+ ``CPU``
864
+
865
+ Examples:
866
+ >>> import numpy as np
867
+ >>> import mindspore.dataset as ds
868
+ >>> import mindspore.dataset.audio as audio
869
+ >>>
870
+ >>> # Use the transform in dataset pipeline mode
871
+ >>> waveform = np.random.random([5, 16]) # 5 samples
872
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
873
+ >>> transforms = [audio.DCShift(0.5, 0.02)]
874
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
875
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
876
+ ... print(item["audio"].shape, item["audio"].dtype)
877
+ ... break
878
+ (16,) float64
879
+ >>>
880
+ >>> # Use the transform in eager mode
881
+ >>> waveform = np.random.random([16]) # 1 sample
882
+ >>> output = audio.DCShift(0.5, 0.02)(waveform)
883
+ >>> print(output.shape, output.dtype)
884
+ (16,) float64
885
+
886
+ Tutorial Examples:
887
+ - `Illustration of audio transforms
888
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
889
+ """
890
+
891
+ @check_dc_shift
892
+ def __init__(self, shift, limiter_gain=None):
893
+ super().__init__()
894
+ self.shift = shift
895
+ self.limiter_gain = limiter_gain if limiter_gain else shift
896
+
897
+ def parse(self):
898
+ return cde.DCShiftOperation(self.shift, self.limiter_gain)
899
+
900
+
901
+ class DeemphBiquad(AudioTensorOperation):
902
+ """
903
+ Apply Compact Disc (IEC 60908) de-emphasis (a treble attenuation shelving filter) to the audio waveform.
904
+
905
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
906
+
907
+ Args:
908
+ sample_rate (int): Sampling rate of the waveform, must be 44100 or 48000 (Hz).
909
+
910
+ Raises:
911
+ TypeError: If `sample_rate` is not of type int.
912
+ ValueError: If `sample_rate` is not 44100 or 48000.
913
+ RuntimeError: If input tensor is not in shape of <..., time>.
914
+
915
+ Supported Platforms:
916
+ ``CPU``
917
+
918
+ Examples:
919
+ >>> import numpy as np
920
+ >>> import mindspore.dataset as ds
921
+ >>> import mindspore.dataset.audio as audio
922
+ >>>
923
+ >>> # Use the transform in dataset pipeline mode
924
+ >>> waveform = np.random.random([5, 8]) # 5 samples
925
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
926
+ >>> transforms = [audio.DeemphBiquad(44100)]
927
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
928
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
929
+ ... print(item["audio"].shape, item["audio"].dtype)
930
+ ... break
931
+ (8,) float64
932
+ >>>
933
+ >>> # Use the transform in eager mode
934
+ >>> waveform = np.random.random([8]) # 1 sample
935
+ >>> output = audio.DeemphBiquad(44100)(waveform)
936
+ >>> print(output.shape, output.dtype)
937
+ (8,) float64
938
+
939
+ Tutorial Examples:
940
+ - `Illustration of audio transforms
941
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
942
+ """
943
+
944
+ @check_deemph_biquad
945
+ def __init__(self, sample_rate):
946
+ super().__init__()
947
+ self.sample_rate = sample_rate
948
+
949
+ def parse(self):
950
+ return cde.DeemphBiquadOperation(self.sample_rate)
951
+
952
+
953
+ class DetectPitchFrequency(AudioTensorOperation):
954
+ """
955
+ Detect pitch frequency.
956
+
957
+ It is implemented using normalized cross-correlation function and median smoothing.
958
+
959
+ Args:
960
+ sample_rate (int): Sampling rate of the waveform, e.g. ``44100`` (Hz), the value can't be zero.
961
+ frame_time (float, optional): Duration of a frame, the value must be greater than zero. Default: ``0.01``.
962
+ win_length (int, optional): The window length for median smoothing (in number of frames), the value must be
963
+ greater than zero. Default: ``30``.
964
+ freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero.
965
+ Default: ``85``.
966
+ freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero.
967
+ Default: ``3400``.
968
+
969
+ Raises:
970
+ TypeError: If `sample_rate` is not of type int.
971
+ ValueError: If `sample_rate` is 0.
972
+ TypeError: If `frame_time` is not of type float.
973
+ ValueError: If `frame_time` is not positive.
974
+ TypeError: If `win_length` is not of type int.
975
+ ValueError: If `win_length` is not positive.
976
+ TypeError: If `freq_low` is not of type int.
977
+ ValueError: If `freq_low` is not positive.
978
+ TypeError: If `freq_high` is not of type int.
979
+ ValueError: If `freq_high` is not positive.
980
+
981
+ Supported Platforms:
982
+ ``CPU``
983
+
984
+ Examples:
985
+ >>> import numpy as np
986
+ >>> import mindspore.dataset as ds
987
+ >>> import mindspore.dataset.audio as audio
988
+ >>>
989
+ >>> # Use the transform in dataset pipeline mode
990
+ >>> waveform = np.random.random([5, 16]) # 5 samples
991
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
992
+ >>> transforms = [audio.DetectPitchFrequency(30, 0.1, 3, 5, 25)]
993
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
994
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
995
+ ... print(item["audio"].shape, item["audio"].dtype)
996
+ ... break
997
+ (5,) float32
998
+ >>>
999
+ >>> # Use the transform in eager mode
1000
+ >>> waveform = np.random.random([16]) # 1 sample
1001
+ >>> output = audio.DetectPitchFrequency(30, 0.1, 3, 5, 25)(waveform)
1002
+ >>> print(output.shape, output.dtype)
1003
+ (5,) float32
1004
+
1005
+ Tutorial Examples:
1006
+ - `Illustration of audio transforms
1007
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1008
+ """
1009
+
1010
+ @check_detect_pitch_frequency
1011
+ def __init__(self, sample_rate, frame_time=0.01, win_length=30, freq_low=85, freq_high=3400):
1012
+ super().__init__()
1013
+ self.sample_rate = sample_rate
1014
+ self.frame_time = frame_time
1015
+ self.win_length = win_length
1016
+ self.freq_low = freq_low
1017
+ self.freq_high = freq_high
1018
+
1019
+ def parse(self):
1020
+ return cde.DetectPitchFrequencyOperation(self.sample_rate, self.frame_time,
1021
+ self.win_length, self.freq_low, self.freq_high)
1022
+
1023
+
1024
+ DE_C_DENSITY_FUNCTION = {DensityFunction.TPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_TPDF,
1025
+ DensityFunction.RPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_RPDF,
1026
+ DensityFunction.GPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_GPDF}
1027
+
1028
+
1029
+ class Dither(AudioTensorOperation):
1030
+ """
1031
+ Dither increases the perceived dynamic range of audio stored at a
1032
+ particular bit-depth by eliminating nonlinear truncation distortion.
1033
+
1034
+ Args:
1035
+ density_function (DensityFunction, optional): The density function of a continuous
1036
+ random variable, can be ``DensityFunction.TPDF`` (Triangular Probability Density Function),
1037
+ ``DensityFunction.RPDF`` (Rectangular Probability Density Function) or
1038
+ ``DensityFunction.GPDF`` (Gaussian Probability Density Function).
1039
+ Default: ``DensityFunction.TPDF``.
1040
+ noise_shaping (bool, optional): A filtering process that shapes the spectral
1041
+ energy of quantisation error. Default: ``False``.
1042
+
1043
+ Raises:
1044
+ TypeError: If `density_function` is not of type :class:`mindspore.dataset.audio.DensityFunction` .
1045
+ TypeError: If `noise_shaping` is not of type bool.
1046
+ RuntimeError: If input tensor is not in shape of <..., time>.
1047
+
1048
+ Supported Platforms:
1049
+ ``CPU``
1050
+
1051
+ Examples:
1052
+ >>> import numpy as np
1053
+ >>> import mindspore.dataset as ds
1054
+ >>> import mindspore.dataset.audio as audio
1055
+ >>>
1056
+ >>> # Use the transform in dataset pipeline mode
1057
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1058
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1059
+ >>> transforms = [audio.Dither()]
1060
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1061
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1062
+ ... print(item["audio"].shape, item["audio"].dtype)
1063
+ ... break
1064
+ (16,) float64
1065
+ >>>
1066
+ >>> # Use the transform in eager mode
1067
+ >>> waveform = np.random.random([16]) # 1 sample
1068
+ >>> output = audio.Dither()(waveform)
1069
+ >>> print(output.shape, output.dtype)
1070
+ (16,) float64
1071
+
1072
+ Tutorial Examples:
1073
+ - `Illustration of audio transforms
1074
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1075
+ """
1076
+
1077
+ @check_dither
1078
+ def __init__(self, density_function=DensityFunction.TPDF, noise_shaping=False):
1079
+ super().__init__()
1080
+ self.density_function = density_function
1081
+ self.noise_shaping = noise_shaping
1082
+
1083
+ def parse(self):
1084
+ return cde.DitherOperation(DE_C_DENSITY_FUNCTION.get(self.density_function), self.noise_shaping)
1085
+
1086
+
1087
+ class EqualizerBiquad(AudioTensorOperation):
1088
+ """
1089
+ Design biquad equalizer filter and perform filtering.
1090
+
1091
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
1092
+
1093
+ Args:
1094
+ sample_rate (int): Sampling rate of the waveform, e.g. ``44100`` (Hz), the value can't be 0.
1095
+ center_freq (float): Central frequency (in Hz).
1096
+ gain (float): Desired gain at the boost (or attenuation) in dB.
1097
+ Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: ``0.707``.
1098
+
1099
+ Raises:
1100
+ TypeError: If `sample_rate` is not of type int.
1101
+ ValueError: If `sample_rate` is 0.
1102
+ TypeError: If `center_freq` is not of type float.
1103
+ TypeError: If `gain` is not of type float.
1104
+ TypeError: If `Q` is not of type float.
1105
+ ValueError: If `Q` is not in range of (0, 1].
1106
+
1107
+ Supported Platforms:
1108
+ ``CPU``
1109
+
1110
+ Examples:
1111
+ >>> import numpy as np
1112
+ >>> import mindspore.dataset as ds
1113
+ >>> import mindspore.dataset.audio as audio
1114
+ >>>
1115
+ >>> # Use the transform in dataset pipeline mode
1116
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1117
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1118
+ >>> transforms = [audio.EqualizerBiquad(44100, 1500, 5.5, 0.7)]
1119
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1120
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1121
+ ... print(item["audio"].shape, item["audio"].dtype)
1122
+ ... break
1123
+ (16,) float64
1124
+ >>>
1125
+ >>> # Use the transform in eager mode
1126
+ >>> waveform = np.random.random([16]) # 1 sample
1127
+ >>> output = audio.EqualizerBiquad(44100, 1500, 5.5, 0.7)(waveform)
1128
+ >>> print(output.shape, output.dtype)
1129
+ (16,) float64
1130
+
1131
+ Tutorial Examples:
1132
+ - `Illustration of audio transforms
1133
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1134
+ """
1135
+
1136
+ @check_equalizer_biquad
1137
+ def __init__(self, sample_rate, center_freq, gain, Q=0.707):
1138
+ super().__init__()
1139
+ self.sample_rate = sample_rate
1140
+ self.center_freq = center_freq
1141
+ self.gain = gain
1142
+ self.quality_factor = Q
1143
+
1144
+ def parse(self):
1145
+ return cde.EqualizerBiquadOperation(self.sample_rate, self.center_freq, self.gain, self.quality_factor)
1146
+
1147
+
1148
+ DE_C_FADE_SHAPE = {FadeShape.QUARTER_SINE: cde.FadeShape.DE_FADE_SHAPE_QUARTER_SINE,
1149
+ FadeShape.HALF_SINE: cde.FadeShape.DE_FADE_SHAPE_HALF_SINE,
1150
+ FadeShape.LINEAR: cde.FadeShape.DE_FADE_SHAPE_LINEAR,
1151
+ FadeShape.LOGARITHMIC: cde.FadeShape.DE_FADE_SHAPE_LOGARITHMIC,
1152
+ FadeShape.EXPONENTIAL: cde.FadeShape.DE_FADE_SHAPE_EXPONENTIAL}
1153
+
1154
+
1155
+ class Fade(AudioTensorOperation):
1156
+ """
1157
+ Add a fade in and/or fade out to an waveform.
1158
+
1159
+ Args:
1160
+ fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative. Default: ``0``.
1161
+ fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative. Default: ``0``.
1162
+ fade_shape (FadeShape, optional): Shape of fade, five different types can be chosen as defined in FadeShape.
1163
+ Default: ``FadeShape.LINEAR``.
1164
+
1165
+ - ``FadeShape.QUARTER_SINE``, means it tend to 0 in an quarter sin function.
1166
+
1167
+ - ``FadeShape.HALF_SINE``, means it tend to 0 in an half sin function.
1168
+
1169
+ - ``FadeShape.LINEAR``, means it linear to 0.
1170
+
1171
+ - ``FadeShape.LOGARITHMIC``, means it tend to 0 in an logrithmic function.
1172
+
1173
+ - ``FadeShape.EXPONENTIAL``, means it tend to 0 in an exponential function.
1174
+
1175
+ Raises:
1176
+ RuntimeError: If fade_in_len exceeds waveform length.
1177
+ RuntimeError: If fade_out_len exceeds waveform length.
1178
+
1179
+ Supported Platforms:
1180
+ ``CPU``
1181
+
1182
+ Examples:
1183
+ >>> import numpy as np
1184
+ >>> import mindspore.dataset as ds
1185
+ >>> import mindspore.dataset.audio as audio
1186
+ >>>
1187
+ >>> # Use the transform in dataset pipeline mode
1188
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1189
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1190
+ >>> transforms = [audio.Fade(fade_in_len=3, fade_out_len=2, fade_shape=audio.FadeShape.LINEAR)]
1191
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1192
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1193
+ ... print(item["audio"].shape, item["audio"].dtype)
1194
+ ... break
1195
+ (16,) float64
1196
+ >>>
1197
+ >>> # Use the transform in eager mode
1198
+ >>> waveform = np.random.random([16]) # 1 sample
1199
+ >>> output = audio.Fade(fade_in_len=3, fade_out_len=2, fade_shape=audio.FadeShape.LINEAR)(waveform)
1200
+ >>> print(output.shape, output.dtype)
1201
+ (16,) float64
1202
+
1203
+ Tutorial Examples:
1204
+ - `Illustration of audio transforms
1205
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1206
+ """
1207
+
1208
+ @check_fade
1209
+ def __init__(self, fade_in_len=0, fade_out_len=0, fade_shape=FadeShape.LINEAR):
1210
+ super().__init__()
1211
+ self.fade_in_len = fade_in_len
1212
+ self.fade_out_len = fade_out_len
1213
+ self.fade_shape = fade_shape
1214
+
1215
+ def parse(self):
1216
+ return cde.FadeOperation(self.fade_in_len, self.fade_out_len, DE_C_FADE_SHAPE.get(self.fade_shape))
1217
+
1218
+
1219
+ class Filtfilt(AudioTensorOperation):
1220
+ """
1221
+ Apply an IIR filter forward and backward to a waveform.
1222
+
1223
+ Args:
1224
+ a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
1225
+ Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
1226
+ Must be same size as b_coeffs (pad with 0's as necessary).
1227
+ b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
1228
+ Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
1229
+ Must be same size as a_coeffs (pad with 0's as necessary).
1230
+ clamp (bool, optional): If ``True``, clamp the output signal to be in the range [-1, 1].
1231
+ Default: ``True``.
1232
+
1233
+ Raises:
1234
+ TypeError: If `a_coeffs` is not of type Sequence[float].
1235
+ TypeError: If `b_coeffs` is not of type Sequence[float].
1236
+ ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
1237
+ TypeError: If `clamp` is not of type bool.
1238
+ RuntimeError: If shape of the input audio is not <..., time>.
1239
+
1240
+ Examples:
1241
+ >>> import numpy as np
1242
+ >>> import mindspore.dataset as ds
1243
+ >>> import mindspore.dataset.audio as audio
1244
+ >>>
1245
+ >>> # Use the transform in dataset pipeline mode
1246
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1247
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1248
+ >>> transforms = [audio.Filtfilt(a_coeffs=[0.1, 0.2, 0.3], b_coeffs=[0.1, 0.2, 0.3])]
1249
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1250
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1251
+ ... print(item["audio"].shape, item["audio"].dtype)
1252
+ ... break
1253
+ (16,) float64
1254
+ >>>
1255
+ >>> # Use the transform in eager mode
1256
+ >>> waveform = np.random.random([16]) # 1 sample
1257
+ >>> output = audio.Filtfilt(a_coeffs=[0.1, 0.2, 0.3], b_coeffs=[0.1, 0.2, 0.3])(waveform)
1258
+ >>> print(output.shape, output.dtype)
1259
+ (16,) float64
1260
+
1261
+ Tutorial Examples:
1262
+ - `Illustration of audio transforms
1263
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1264
+ """
1265
+
1266
+ @check_lfilter
1267
+ def __init__(self, a_coeffs, b_coeffs, clamp=True):
1268
+ super().__init__()
1269
+ self.a_coeffs = a_coeffs
1270
+ self.b_coeffs = b_coeffs
1271
+ self.clamp = clamp
1272
+
1273
+ def parse(self):
1274
+ return cde.FiltfiltOperation(self.a_coeffs, self.b_coeffs, self.clamp)
1275
+
1276
+
1277
+ DE_C_MODULATION = {Modulation.SINUSOIDAL: cde.Modulation.DE_MODULATION_SINUSOIDAL,
1278
+ Modulation.TRIANGULAR: cde.Modulation.DE_MODULATION_TRIANGULAR}
1279
+
1280
+ DE_C_INTERPOLATION = {Interpolation.LINEAR: cde.Interpolation.DE_INTERPOLATION_LINEAR,
1281
+ Interpolation.QUADRATIC: cde.Interpolation.DE_INTERPOLATION_QUADRATIC}
1282
+
1283
+
1284
+ class Flanger(AudioTensorOperation):
1285
+ """
1286
+ Apply a flanger effect to the audio.
1287
+
1288
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
1289
+
1290
+ Args:
1291
+ sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
1292
+ delay (float, optional): Desired delay in milliseconds, in range of [0, 30]. Default: ``0.0``.
1293
+ depth (float, optional): Desired delay depth in milliseconds, in range of [0, 10]. Default: ``2.0``.
1294
+ regen (float, optional): Desired regen (feedback gain) in dB, in range of [-95, 95]. Default: ``0.0``.
1295
+ width (float, optional): Desired width (delay gain) in dB, in range of [0, 100]. Default: ``71.0``.
1296
+ speed (float, optional): Modulation speed in Hz, in range of [0.1, 10]. Default: ``0.5``.
1297
+ phase (float, optional): Percentage phase-shift for multi-channel, in range of [0, 100]. Default: ``25.0``.
1298
+ modulation (Modulation, optional): Modulation method, can be ``Modulation.SINUSOIDAL`` or
1299
+ ``Modulation.TRIANGULAR``. Default: ``Modulation.SINUSOIDAL``.
1300
+ interpolation (Interpolation, optional): Interpolation method, can be ``Interpolation.LINEAR`` or
1301
+ ``Interpolation.QUADRATIC``. Default: ``Interpolation.LINEAR``.
1302
+
1303
+ Raises:
1304
+ TypeError: If `sample_rate` is not of type int.
1305
+ ValueError: If `sample_rate` is zero.
1306
+ TypeError: If `delay` is not of type float.
1307
+ ValueError: If `delay` is not in range of [0, 30].
1308
+ TypeError: If `depth` is not of type float.
1309
+ ValueError: If `depth` is not in range of [0, 10].
1310
+ TypeError: If `regen` is not of type float.
1311
+ ValueError: If `regen` is not in range of [-95, 95].
1312
+ TypeError: If `width` is not of type float.
1313
+ ValueError: If `width` is not in range of [0, 100].
1314
+ TypeError: If `speed` is not of type float.
1315
+ ValueError: If `speed` is not in range of [0.1, 10].
1316
+ TypeError: If `phase` is not of type float.
1317
+ ValueError: If `phase` is not in range of [0, 100].
1318
+ TypeError: If `modulation` is not of type :class:`mindspore.dataset.audio.Modulation` .
1319
+ TypeError: If `interpolation` is not of type :class:`mindspore.dataset.audio.Interpolation` .
1320
+ RuntimeError: If input tensor is not in shape of <..., channel, time>.
1321
+
1322
+ Supported Platforms:
1323
+ ``CPU``
1324
+
1325
+ Examples:
1326
+ >>> import numpy as np
1327
+ >>> import mindspore.dataset as ds
1328
+ >>> import mindspore.dataset.audio as audio
1329
+ >>>
1330
+ >>> # Use the transform in dataset pipeline mode
1331
+ >>> waveform = np.random.random([5, 4, 16]) # 5 samples
1332
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1333
+ >>> transforms = [audio.Flanger(44100)]
1334
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1335
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1336
+ ... print(item["audio"].shape, item["audio"].dtype)
1337
+ ... break
1338
+ (4, 16) float64
1339
+ >>>
1340
+ >>> # Use the transform in eager mode
1341
+ >>> waveform = np.random.random([4, 16]) # 1 sample
1342
+ >>> output = audio.Flanger(44100)(waveform)
1343
+ >>> print(output.shape, output.dtype)
1344
+ (4, 16) float64
1345
+
1346
+ Tutorial Examples:
1347
+ - `Illustration of audio transforms
1348
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1349
+ """
1350
+
1351
+ @check_flanger
1352
+ def __init__(self, sample_rate, delay=0.0, depth=2.0, regen=0.0, width=71.0, speed=0.5, phase=25.0,
1353
+ modulation=Modulation.SINUSOIDAL, interpolation=Interpolation.LINEAR):
1354
+ super().__init__()
1355
+ self.sample_rate = sample_rate
1356
+ self.delay = delay
1357
+ self.depth = depth
1358
+ self.regen = regen
1359
+ self.width = width
1360
+ self.speed = speed
1361
+ self.phase = phase
1362
+ self.modulation = modulation
1363
+ self.interpolation = interpolation
1364
+
1365
+ def parse(self):
1366
+ return cde.FlangerOperation(self.sample_rate, self.delay, self.depth, self.regen, self.width, self.speed,
1367
+ self.phase, DE_C_MODULATION.get(self.modulation),
1368
+ DE_C_INTERPOLATION.get(self.interpolation))
1369
+
1370
+
1371
+ class FrequencyMasking(AudioTensorOperation):
1372
+ """
1373
+ Apply masking to a spectrogram in the frequency domain.
1374
+
1375
+ Note:
1376
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
1377
+
1378
+ Args:
1379
+ iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: ``False``.
1380
+ freq_mask_param (int, optional): When `iid_masks` is ``True``, length of the mask will be uniformly sampled
1381
+ from [0, freq_mask_param]; When `iid_masks` is ``False``, directly use it as length of the mask.
1382
+ The value should be in range of [0, freq_length], where `freq_length` is the length of audio waveform
1383
+ in frequency domain. Default: ``0``.
1384
+ mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is ``True``.
1385
+ The value should be in range of [0, freq_length - freq_mask_param], where `freq_length` is
1386
+ the length of audio waveform in frequency domain. Default: ``0``.
1387
+ mask_value (float, optional): Value to assign to the masked columns. Default: ``0.0``.
1388
+
1389
+ Raises:
1390
+ TypeError: If `iid_masks` is not of type bool.
1391
+ TypeError: If `freq_mask_param` is not of type int.
1392
+ ValueError: If `freq_mask_param` is greater than the length of audio waveform in frequency domain.
1393
+ TypeError: If `mask_start` is not of type int.
1394
+ ValueError: If `mask_start` is a negative number.
1395
+ TypeError: If `mask_value` is not of type float.
1396
+ ValueError: If `mask_value` is a negative number.
1397
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
1398
+
1399
+ Supported Platforms:
1400
+ ``CPU``
1401
+
1402
+ Examples:
1403
+ >>> import numpy as np
1404
+ >>> import mindspore.dataset as ds
1405
+ >>> import mindspore.dataset.audio as audio
1406
+ >>>
1407
+ >>> # Use the transform in dataset pipeline mode
1408
+ >>> waveform = np.random.random([5, 16, 2]) # 5 samples
1409
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1410
+ >>> transforms = [audio.FrequencyMasking(iid_masks=True, freq_mask_param=1)]
1411
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1412
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1413
+ ... print(item["audio"].shape, item["audio"].dtype)
1414
+ ... break
1415
+ (16, 2) float64
1416
+ >>>
1417
+ >>> # Use the transform in eager mode
1418
+ >>> waveform = np.random.random([16, 2]) # 1 sample
1419
+ >>> output = audio.FrequencyMasking(iid_masks=True, freq_mask_param=1)(waveform)
1420
+ >>> print(output.shape, output.dtype)
1421
+ (16, 2) float64
1422
+
1423
+ Tutorial Examples:
1424
+ - `Illustration of audio transforms
1425
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1426
+
1427
+ .. image:: frequency_masking_original.png
1428
+
1429
+ .. image:: frequency_masking.png
1430
+ """
1431
+
1432
+ @check_masking
1433
+ def __init__(self, iid_masks=False, freq_mask_param=0, mask_start=0, mask_value=0.0):
1434
+ super().__init__()
1435
+ self.iid_masks = iid_masks
1436
+ self.frequency_mask_param = freq_mask_param
1437
+ self.mask_start = mask_start
1438
+ self.mask_value = mask_value
1439
+
1440
+ def parse(self):
1441
+ return cde.FrequencyMaskingOperation(self.iid_masks, self.frequency_mask_param, self.mask_start,
1442
+ self.mask_value)
1443
+
1444
+
1445
+ class Gain(AudioTensorOperation):
1446
+ """
1447
+ Apply amplification or attenuation to the whole waveform.
1448
+
1449
+ Args:
1450
+ gain_db (float): Gain adjustment in decibels (dB). Default: ``1.0``.
1451
+
1452
+ Raises:
1453
+ TypeError: If `gain_db` is not of type float.
1454
+
1455
+ Supported Platforms:
1456
+ ``CPU``
1457
+
1458
+ Examples:
1459
+ >>> import numpy as np
1460
+ >>> import mindspore.dataset as ds
1461
+ >>> import mindspore.dataset.audio as audio
1462
+ >>>
1463
+ >>> # Use the transform in dataset pipeline mode
1464
+ >>> waveform = np.random.random([5, 8]) # 5 samples
1465
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1466
+ >>> transforms = [audio.Gain(1.2)]
1467
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1468
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1469
+ ... print(item["audio"].shape, item["audio"].dtype)
1470
+ ... break
1471
+ (8,) float64
1472
+ >>>
1473
+ >>> # Use the transform in eager mode
1474
+ >>> waveform = np.random.random([8]) # 1 sample
1475
+ >>> output = audio.Gain(1.2)(waveform)
1476
+ >>> print(output.shape, output.dtype)
1477
+ (8,) float64
1478
+
1479
+ Tutorial Examples:
1480
+ - `Illustration of audio transforms
1481
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1482
+ """
1483
+
1484
+ @check_gain
1485
+ def __init__(self, gain_db=1.0):
1486
+ super().__init__()
1487
+ self.gain_db = gain_db
1488
+
1489
+ def parse(self):
1490
+ return cde.GainOperation(self.gain_db)
1491
+
1492
+
1493
+ class GriffinLim(AudioTensorOperation):
1494
+ r"""
1495
+ Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
1496
+
1497
+ About Griffin-Lim please refer to `A fast Griffin-Lim algorithm <https://doi.org/10.1109/WASPAA.2013.6701851>`_
1498
+ and `Signal estimation from modified short-time Fourier transform <https://doi.org/10.1109/ICASSP.1983.1172092>`_ .
1499
+
1500
+ Args:
1501
+ n_fft (int, optional): Size of FFT. Default: ``400``.
1502
+ n_iter (int, optional): Number of iteration for phase recovery. Default: ``32``.
1503
+ win_length (int, optional): Window size for GriffinLim. Default: ``None``, will be set to `n_fft` .
1504
+ hop_length (int, optional): Length of hop between STFT windows.
1505
+ Default: ``None``, will be set to `win_length // 2` .
1506
+ window_type (WindowType, optional): Window type for GriffinLim, which can be ``WindowType.BARTLETT``,
1507
+ ``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN`` or ``WindowType.KAISER``.
1508
+ Default: ``WindowType.HANN``. Currently kaiser window is not supported on macOS.
1509
+ power (float, optional): Exponent for the magnitude spectrogram. Default: ``2.0``.
1510
+ momentum (float, optional): The momentum for fast Griffin-Lim. Default: ``0.99``.
1511
+ length (int, optional): Length of the expected output waveform. Default: ``None``,
1512
+ will be set to the value of last dimension of the stft matrix.
1513
+ rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization.
1514
+ Default: ``True``.
1515
+
1516
+ Raises:
1517
+ TypeError: If `n_fft` is not of type int.
1518
+ ValueError: If `n_fft` is not positive.
1519
+ TypeError: If `n_iter` is not of type int.
1520
+ ValueError: If `n_iter` is not positive.
1521
+ TypeError: If `win_length` is not of type int.
1522
+ ValueError: If `win_length` is a negative number.
1523
+ TypeError: If `hop_length` is not of type int.
1524
+ ValueError: If `hop_length` is a negative number.
1525
+ TypeError: If `window_type` is not of type :class:`mindspore.dataset.audio.WindowType` .
1526
+ TypeError: If `power` is not of type float.
1527
+ ValueError: If `power` is not positive.
1528
+ TypeError: If `momentum` is not of type float.
1529
+ ValueError: If `momentum` is a negative number.
1530
+ TypeError: If `length` is not of type int.
1531
+ ValueError: If `length` is a negative number.
1532
+ TypeError: If `rand_init` is not of type bool.
1533
+ RuntimeError: If `n_fft` is not less than `length` .
1534
+ RuntimeError: If `win_length` is not less than `n_fft` .
1535
+
1536
+ Supported Platforms:
1537
+ ``CPU``
1538
+
1539
+ Examples:
1540
+ >>> import numpy as np
1541
+ >>> import mindspore.dataset as ds
1542
+ >>> import mindspore.dataset.audio as audio
1543
+ >>>
1544
+ >>> # Use the transform in dataset pipeline mode
1545
+ >>> waveform = np.random.random([5, 201, 6]) # 5 samples
1546
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1547
+ >>> transforms = [audio.GriffinLim(n_fft=400)]
1548
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1549
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1550
+ ... print(item["audio"].shape, item["audio"].dtype)
1551
+ ... break
1552
+ (1000,) float64
1553
+ >>>
1554
+ >>> # Use the transform in eager mode
1555
+ >>> waveform = np.random.random([201, 6]) # 1 sample
1556
+ >>> output = audio.GriffinLim(n_fft=400)(waveform)
1557
+ >>> print(output.shape, output.dtype)
1558
+ (1000,) float64
1559
+
1560
+ Tutorial Examples:
1561
+ - `Illustration of audio transforms
1562
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1563
+ """
1564
+
1565
+ @check_griffin_lim
1566
+ def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN, power=2.0,
1567
+ momentum=0.99, length=None, rand_init=True):
1568
+ super().__init__()
1569
+ self.n_fft = n_fft
1570
+ self.n_iter = n_iter
1571
+ self.win_length = win_length if win_length else self.n_fft
1572
+ self.hop_length = hop_length if hop_length else self.win_length // 2
1573
+ self.window_type = window_type
1574
+ self.power = power
1575
+ self.momentum = momentum
1576
+ self.length = length if length else 0
1577
+ self.rand_init = rand_init
1578
+
1579
+ def parse(self):
1580
+ return cde.GriffinLimOperation(self.n_fft, self.n_iter, self.win_length, self.hop_length,
1581
+ DE_C_WINDOW_TYPE.get(self.window_type), self.power, self.momentum, self.length,
1582
+ self.rand_init)
1583
+
1584
+
1585
+ class HighpassBiquad(AudioTensorOperation):
1586
+ """
1587
+ Design biquad highpass filter and perform filtering.
1588
+
1589
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
1590
+
1591
+ Args:
1592
+ sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
1593
+ cutoff_freq (float): Filter cutoff frequency (in Hz).
1594
+ Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: ``0.707``.
1595
+
1596
+ Raises:
1597
+ TypeError: If `sample_rate` is not of type int.
1598
+ ValueError: If `sample_rate` is 0.
1599
+ TypeError: If `cutoff_freq` is not of type float.
1600
+ TypeError: If `Q` is not of type float.
1601
+ ValueError: If `Q` is not in range of (0, 1].
1602
+ RuntimeError: If the shape of input audio waveform does not match <..., time>.
1603
+
1604
+ Supported Platforms:
1605
+ ``CPU``
1606
+
1607
+ Examples:
1608
+ >>> import numpy as np
1609
+ >>> import mindspore.dataset as ds
1610
+ >>> import mindspore.dataset.audio as audio
1611
+ >>>
1612
+ >>> # Use the transform in dataset pipeline mode
1613
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1614
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1615
+ >>> transforms = [audio.HighpassBiquad(44100, 1500, 0.7)]
1616
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1617
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1618
+ ... print(item["audio"].shape, item["audio"].dtype)
1619
+ ... break
1620
+ (16,) float64
1621
+ >>>
1622
+ >>> # Use the transform in eager mode
1623
+ >>> waveform = np.random.random([16]) # 1 sample
1624
+ >>> output = audio.HighpassBiquad(44100, 1500, 0.7)(waveform)
1625
+ >>> print(output.shape, output.dtype)
1626
+ (16,) float64
1627
+
1628
+ Tutorial Examples:
1629
+ - `Illustration of audio transforms
1630
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1631
+ """
1632
+
1633
+ @check_highpass_biquad
1634
+ def __init__(self, sample_rate, cutoff_freq, Q=0.707):
1635
+ super().__init__()
1636
+ self.sample_rate = sample_rate
1637
+ self.cutoff_freq = cutoff_freq
1638
+ self.quality_factor = Q
1639
+
1640
+ def parse(self):
1641
+ return cde.HighpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.quality_factor)
1642
+
1643
+
1644
+ class InverseMelScale(AudioTensorOperation):
1645
+ """
1646
+ Solve for a normal STFT from a mel frequency STFT, using a conversion matrix.
1647
+
1648
+ Args:
1649
+ n_stft (int): Number of bins in STFT.
1650
+ n_mels (int, optional): Number of mel filterbanks. Default: ``128``.
1651
+ sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
1652
+ f_min (float, optional): Minimum frequency. Default: ``0.0``.
1653
+ f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
1654
+ max_iter (int, optional): Maximum number of optimization iterations. Default: ``100000``.
1655
+ tolerance_loss (float, optional): Value of loss to stop optimization at. Default: ``1e-5``.
1656
+ tolerance_change (float, optional): Difference in losses to stop optimization at. Default: ``1e-8``.
1657
+ sgdargs (dict, optional): Arguments for the SGD optimizer. Default: ``None``, will be set to
1658
+ {'sgd_lr': 0.1, 'sgd_momentum': 0.9}.
1659
+ norm (NormType, optional): Normalization method, can be ``NormType.SLANEY`` or ``NormType.NONE``.
1660
+ Default: ``NormType.NONE``, no narmalization.
1661
+ mel_type (MelType, optional): Mel scale to use, can be ``MelType.SLANEY`` or ``MelType.HTK``.
1662
+ Default: ``MelType.HTK``.
1663
+
1664
+ Raises:
1665
+ TypeError: If `n_stft` is not of type int.
1666
+ ValueError: If `n_stft` is not positive.
1667
+ TypeError: If `n_mels` is not of type int.
1668
+ ValueError: If `n_mels` is not positive.
1669
+ TypeError: If `sample_rate` is not of type int.
1670
+ ValueError: If `sample_rate` is not positive.
1671
+ TypeError: If `f_min` is not of type float.
1672
+ ValueError: If `f_min` is greater than or equal to `f_max` .
1673
+ TypeError: If `f_max` is not of type float.
1674
+ ValueError: If `f_max` is a negative number.
1675
+ TypeError: If `max_iter` is not of type int.
1676
+ ValueError: If `max_iter` is a negative number.
1677
+ TypeError: If `tolerance_loss` is not of type float.
1678
+ ValueError: If `tolerance_loss` is a negative number.
1679
+ TypeError: If `tolerance_change` is not of type float.
1680
+ ValueError: If `tolerance_change` is a negative number.
1681
+ TypeError: If `sgdargs` is not of type dict.
1682
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
1683
+ TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
1684
+
1685
+ Supported Platforms:
1686
+ ``CPU``
1687
+
1688
+ Examples:
1689
+ >>> import numpy as np
1690
+ >>> import mindspore.dataset as ds
1691
+ >>> import mindspore.dataset.audio as audio
1692
+ >>>
1693
+ >>> # Use the transform in dataset pipeline mode
1694
+ >>> waveform = np.random.randn(5, 8, 3, 2) # 5 samples
1695
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1696
+ >>> transforms = [audio.InverseMelScale(20, 3, 16000, 0, 8000, 10)]
1697
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1698
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1699
+ ... print(item["audio"].shape, item["audio"].dtype)
1700
+ ... break
1701
+ (8, 20, 2) float64
1702
+ >>>
1703
+ >>> # Use the transform in eager mode
1704
+ >>> waveform = np.random.random([8, 3, 2]) # 1 sample
1705
+ >>> output = audio.InverseMelScale(20, 3, 16000, 0, 8000, 10)(waveform)
1706
+ >>> print(output.shape, output.dtype)
1707
+ (8, 20, 2) float64
1708
+
1709
+ Tutorial Examples:
1710
+ - `Illustration of audio transforms
1711
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1712
+ """
1713
+
1714
+ @check_inverse_mel_scale
1715
+ def __init__(self, n_stft, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, max_iter=100000,
1716
+ tolerance_loss=1e-5, tolerance_change=1e-8, sgdargs=None, norm=NormType.NONE, mel_type=MelType.HTK):
1717
+ super().__init__()
1718
+ self.n_stft = n_stft
1719
+ self.n_mels = n_mels
1720
+ self.sample_rate = sample_rate
1721
+ self.f_min = f_min
1722
+ self.f_max = f_max if f_max is not None else sample_rate // 2
1723
+ self.max_iter = max_iter
1724
+ self.tolerance_loss = tolerance_loss
1725
+ self.tolerance_change = tolerance_change
1726
+ if sgdargs is None:
1727
+ self.sgdargs = {'sgd_lr': 0.1, 'sgd_momentum': 0.9}
1728
+ else:
1729
+ self.sgdargs = sgdargs
1730
+ self.norm = norm
1731
+ self.mel_type = mel_type
1732
+
1733
+ def parse(self):
1734
+ return cde.InverseMelScaleOperation(self.n_stft, self.n_mels, self.sample_rate, self.f_min, self.f_max,
1735
+ self.max_iter, self.tolerance_loss, self.tolerance_change, self.sgdargs,
1736
+ DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
1737
+
1738
+
1739
+ class InverseSpectrogram(AudioTensorOperation):
1740
+ """
1741
+ Create an inverse spectrogram to recover an audio signal from a spectrogram.
1742
+
1743
+ Args:
1744
+ length (int, optional): The output length of the waveform, must be non negative. Default: ``None``,
1745
+ means to output the whole waveform.
1746
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0.
1747
+ Default: ``400``.
1748
+ win_length (int, optional): Window size, which should be greater than 0.
1749
+ Default: ``None``, will be set to `n_fft` .
1750
+ hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
1751
+ Default: ``None``, will be set to `win_length // 2` .
1752
+ pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: ``0``.
1753
+ window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
1754
+ frame/window. Default: ``WindowType.HANN``.
1755
+ normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: ``False``.
1756
+ center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: ``True``.
1757
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
1758
+ can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
1759
+ Default: ``BorderType.REFLECT``.
1760
+ onesided (bool, optional): Controls whether spectrogram was used to return half of results to avoid
1761
+ redundancy. Default: ``True``.
1762
+
1763
+ Raises:
1764
+ TypeError: If `length` is not of type int.
1765
+ ValueError: If `length` is a negative number.
1766
+ TypeError: If `n_fft` is not of type int.
1767
+ ValueError: If `n_fft` is not positive.
1768
+ TypeError: If `win_length` is not of type int.
1769
+ ValueError: If `win_length` is not positive.
1770
+ TypeError: If `hop_length` is not of type int.
1771
+ ValueError: If `hop_length` is not positive.
1772
+ TypeError: If `pad` is not of type int.
1773
+ ValueError: If `pad` is a negative number.
1774
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
1775
+ TypeError: If `normalized` is not of type bool.
1776
+ TypeError: If `center` is not of type bool.
1777
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
1778
+ TypeError: If `onesided` is not of type bool.
1779
+
1780
+ Supported Platforms:
1781
+ ``CPU``
1782
+
1783
+ Examples:
1784
+ >>> import numpy as np
1785
+ >>> import mindspore.dataset as ds
1786
+ >>> import mindspore.dataset.audio as audio
1787
+ >>>
1788
+ >>> # Use the transform in dataset pipeline mode
1789
+ >>> waveform = np.random.random([5, 400 // 2 + 1, 30, 2]) # 5 samples
1790
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1791
+ >>> transforms = [audio.InverseSpectrogram(1, 400, 400, 200)]
1792
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1793
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1794
+ ... print(item["audio"].shape, item["audio"].dtype)
1795
+ ... break
1796
+ (1,) float64
1797
+ >>>
1798
+ >>> # Use the transform in eager mode
1799
+ >>> waveform = np.random.random([400 // 2 + 1, 30, 2]) # 1 sample
1800
+ >>> output = audio.InverseSpectrogram(1, 400, 400, 200)(waveform)
1801
+ >>> print(output.shape, output.dtype)
1802
+ (1,) float64
1803
+
1804
+ Tutorial Examples:
1805
+ - `Illustration of audio transforms
1806
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1807
+ """
1808
+
1809
+ @check_inverse_spectrogram
1810
+ def __init__(self, length=None, n_fft=400, win_length=None, hop_length=None, pad=0,
1811
+ window=WindowType.HANN, normalized=False, center=True,
1812
+ pad_mode=BorderType.REFLECT, onesided=True):
1813
+ super().__init__()
1814
+ self.length = length if length is not None else 0
1815
+ self.n_fft = n_fft
1816
+ self.win_length = win_length if win_length is not None else n_fft
1817
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 2
1818
+ self.pad = pad
1819
+ self.window = window
1820
+ self.normalized = normalized
1821
+ self.center = center
1822
+ self.pad_mode = pad_mode
1823
+ self.onesided = onesided
1824
+
1825
+ def parse(self):
1826
+ return cde.InverseSpectrogramOperation(self.length, self.n_fft, self.win_length, self.hop_length, self.pad,
1827
+ DE_C_WINDOW_TYPE.get(self.window), self.normalized, self.center,
1828
+ DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided)
1829
+
1830
+
1831
+ DE_C_NORM_MODE = {NormMode.ORTHO: cde.NormMode.DE_NORM_MODE_ORTHO,
1832
+ NormMode.NONE: cde.NormMode.DE_NORM_MODE_NONE}
1833
+
1834
+
1835
+ class LFCC(AudioTensorOperation):
1836
+ """
1837
+ Create LFCC for a raw audio signal.
1838
+
1839
+ Note:
1840
+ The shape of the audio waveform to be processed needs to be <..., time>.
1841
+
1842
+ Args:
1843
+ sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
1844
+ n_filter (int, optional) : Number of linear filters to apply. Default: ``128``.
1845
+ n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: ``40``.
1846
+ f_min (float, optional): Minimum frequency. Default: ``0.0``.
1847
+ f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
1848
+ dct_type (int, optional) : Type of DCT to use. The value can only be ``2``. Default: ``2``.
1849
+ norm (NormMode, optional) : Norm to use. Default: ``NormMode.ORTHO``.
1850
+ log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: ``False``.
1851
+ speckwargs (dict, optional) : Arguments for :class:`mindspore.dataset.audio.Spectrogram`.
1852
+ Default: ``None``, the default setting is a dict including
1853
+
1854
+ - 'n_fft': 400
1855
+ - 'win_length': n_fft
1856
+ - 'hop_length': win_length // 2
1857
+ - 'pad': 0
1858
+ - 'window': WindowType.HANN
1859
+ - 'power': 2.0
1860
+ - 'normalized': False
1861
+ - 'center': True
1862
+ - 'pad_mode': BorderType.REFLECT
1863
+ - 'onesided': True
1864
+
1865
+ Raises:
1866
+ TypeError: If `sample_rate` is not of type int.
1867
+ TypeError: If `n_filter` is not of type int.
1868
+ TypeError: If `n_lfcc` is not of type int.
1869
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
1870
+ TypeError: If `log_lf` is not of type bool.
1871
+ TypeError: If `speckwargs` is not of type dict.
1872
+ ValueError: If `sample_rate` is 0.
1873
+ ValueError: If `n_lfcc` is less than 0.
1874
+ ValueError: If `f_min` is greater than `f_max` .
1875
+ ValueError: If `f_min` is greater than `sample_rate // 2` when `f_max` is set to None.
1876
+ ValueError: If `dct_type` is not ``2``.
1877
+
1878
+ Supported Platforms:
1879
+ ``CPU``
1880
+
1881
+ Examples:
1882
+ >>> import numpy as np
1883
+ >>> import mindspore.dataset as ds
1884
+ >>> import mindspore.dataset.audio as audio
1885
+ >>>
1886
+ >>> # Use the transform in dataset pipeline mode
1887
+ >>> waveform = np.random.random([5, 10, 300])
1888
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1889
+ >>> transforms = [audio.LFCC()]
1890
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1891
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1892
+ ... print(item["audio"].shape, item["audio"].dtype)
1893
+ ... break
1894
+ (10, 40, 2) float32
1895
+ >>>
1896
+ >>> # Use the transform in eager mode
1897
+ >>> waveform = np.random.random([10, 300]) # 1 sample
1898
+ >>> output = audio.LFCC()(waveform)
1899
+ >>> print(output.shape, output.dtype)
1900
+ (10, 40, 2) float32
1901
+
1902
+ Tutorial Examples:
1903
+ - `Illustration of audio transforms
1904
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1905
+ """
1906
+
1907
+ @check_lfcc
1908
+ def __init__(self, sample_rate=16000, n_filter=128, n_lfcc=40, f_min=0.0, f_max=None, dct_type=2,
1909
+ norm=NormMode.ORTHO, log_lf=False, speckwargs=None):
1910
+ super().__init__()
1911
+ self.sample_rate = sample_rate
1912
+ self.n_filter = n_filter
1913
+ self.n_lfcc = n_lfcc
1914
+ self.f_min = f_min
1915
+ self.f_max = f_max if f_max is not None else sample_rate // 2
1916
+ self.dct_type = dct_type
1917
+ self.norm = norm
1918
+ self.log_lf = log_lf
1919
+ self.speckwargs = speckwargs
1920
+ if speckwargs is None:
1921
+ self.speckwargs = {}
1922
+ self.speckwargs.setdefault("n_fft", 400)
1923
+ self.speckwargs.setdefault("win_length", self.speckwargs.get("n_fft"))
1924
+ self.speckwargs.setdefault("hop_length", self.speckwargs.get("win_length") // 2)
1925
+ self.speckwargs.setdefault("pad", 0)
1926
+ self.speckwargs.setdefault("window", WindowType.HANN)
1927
+ self.speckwargs.setdefault("power", 2.0)
1928
+ self.speckwargs.setdefault("normalized", False)
1929
+ self.speckwargs.setdefault("center", True)
1930
+ self.speckwargs.setdefault("pad_mode", BorderType.REFLECT)
1931
+ self.speckwargs.setdefault("onesided", True)
1932
+ self.window = self.speckwargs.get("window")
1933
+ self.pad_mode = self.speckwargs.get("pad_mode")
1934
+
1935
+ def parse(self):
1936
+ return cde.LFCCOperation(self.sample_rate, self.n_filter, self.n_lfcc, self.f_min, self.f_max,
1937
+ self.dct_type, DE_C_NORM_MODE.get(self.norm), self.log_lf, self.speckwargs,
1938
+ DE_C_WINDOW_TYPE.get(self.window), DE_C_BORDER_TYPE.get(self.pad_mode))
1939
+
1940
+
1941
+ class LFilter(AudioTensorOperation):
1942
+ """
1943
+ Perform an IIR filter by evaluating different equation.
1944
+
1945
+ Args:
1946
+ a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
1947
+ Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
1948
+ Must be same size as b_coeffs (pad with 0's as necessary).
1949
+ b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
1950
+ Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
1951
+ Must be same size as a_coeffs (pad with 0's as necessary).
1952
+ clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: ``True``.
1953
+
1954
+ Raises:
1955
+ TypeError: If `a_coeffs` is not of type Sequence[float].
1956
+ TypeError: If `b_coeffs` is not of type Sequence[float].
1957
+ ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
1958
+ TypeError: If `clamp` is not of type bool.
1959
+ RuntimeError: If input tensor is not in shape of <..., time>.
1960
+
1961
+ Supported Platforms:
1962
+ ``CPU``
1963
+
1964
+ Examples:
1965
+ >>> import numpy as np
1966
+ >>> import mindspore.dataset as ds
1967
+ >>> import mindspore.dataset.audio as audio
1968
+ >>>
1969
+ >>> # Use the transform in dataset pipeline mode
1970
+ >>> waveform = np.random.random([5, 16]) # 5 samples
1971
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
1972
+ >>> transforms = [audio.LFilter(a_coeffs=[0.1, 0.2, 0.3], b_coeffs=[0.3, 0.2, 0.1])]
1973
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
1974
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
1975
+ ... print(item["audio"].shape, item["audio"].dtype)
1976
+ ... break
1977
+ (16,) float64
1978
+ >>>
1979
+ >>> # Use the transform in eager mode
1980
+ >>> waveform = np.random.random([16]) # 1 sample
1981
+ >>> output = audio.LFilter(a_coeffs=[0.1, 0.2, 0.3], b_coeffs=[0.3, 0.2, 0.1])(waveform)
1982
+ >>> print(output.shape, output.dtype)
1983
+ (16,) float64
1984
+
1985
+ Tutorial Examples:
1986
+ - `Illustration of audio transforms
1987
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
1988
+ """
1989
+
1990
+ @check_lfilter
1991
+ def __init__(self, a_coeffs, b_coeffs, clamp=True):
1992
+ super().__init__()
1993
+ self.a_coeffs = a_coeffs
1994
+ self.b_coeffs = b_coeffs
1995
+ self.clamp = clamp
1996
+
1997
+ def parse(self):
1998
+ return cde.LFilterOperation(self.a_coeffs, self.b_coeffs, self.clamp)
1999
+
2000
+
2001
+ class LowpassBiquad(AudioTensorOperation):
2002
+ r"""
2003
+ Design two-pole low-pass filter for audio waveform.
2004
+
2005
+ A low-pass filter passes frequencies lower than a selected cutoff frequency
2006
+ but attenuates frequencies higher than it. The system function is:
2007
+
2008
+ .. math::
2009
+ H(s) = \frac{1}{s^2 + \frac{s}{Q} + 1}
2010
+
2011
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
2012
+
2013
+ Note:
2014
+ The shape of the audio waveform to be processed needs to be <..., time>.
2015
+
2016
+ Args:
2017
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
2018
+ cutoff_freq (float): Filter cutoff frequency (in Hz).
2019
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
2020
+ in range of (0, 1]. Default: ``0.707``.
2021
+
2022
+ Raises:
2023
+ TypeError: If `sample_rate` is not of type int.
2024
+ ValueError: If `sample_rate` is 0.
2025
+ TypeError: If `cutoff_freq` is not of type float.
2026
+ TypeError: If `Q` is not of type float.
2027
+ ValueError: If `Q` is not in range of (0, 1].
2028
+ RuntimeError: If input tensor is not in shape of <..., time>.
2029
+
2030
+ Supported Platforms:
2031
+ ``CPU``
2032
+
2033
+ Examples:
2034
+ >>> import numpy as np
2035
+ >>> import mindspore.dataset as ds
2036
+ >>> import mindspore.dataset.audio as audio
2037
+ >>>
2038
+ >>> # Use the transform in dataset pipeline mode
2039
+ >>> waveform = np.random.random([5, 10]) # 5 samples
2040
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2041
+ >>> transforms = [audio.LowpassBiquad(4000, 1500, 0.7)]
2042
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2043
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2044
+ ... print(item["audio"].shape, item["audio"].dtype)
2045
+ ... break
2046
+ (10,) float64
2047
+ >>>
2048
+ >>> # Use the transform in eager mode
2049
+ >>> waveform = np.random.random([10]) # 1 sample
2050
+ >>> output = audio.LowpassBiquad(4000, 1500, 0.7)(waveform)
2051
+ >>> print(output.shape, output.dtype)
2052
+ (10,) float64
2053
+
2054
+ Tutorial Examples:
2055
+ - `Illustration of audio transforms
2056
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2057
+ """
2058
+
2059
+ @check_lowpass_biquad
2060
+ def __init__(self, sample_rate, cutoff_freq, Q=0.707):
2061
+ super().__init__()
2062
+ self.sample_rate = sample_rate
2063
+ self.cutoff_freq = cutoff_freq
2064
+ self.quality_factor = Q
2065
+
2066
+ def parse(self):
2067
+ return cde.LowpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.quality_factor)
2068
+
2069
+
2070
+ class Magphase(AudioTensorOperation):
2071
+ """
2072
+ Separate a complex-valued spectrogram with shape :math:`(..., 2)` into its magnitude and phase.
2073
+
2074
+ Args:
2075
+ power (float): Power of the norm, which must be non-negative. Default: ``1.0``.
2076
+
2077
+ Raises:
2078
+ RuntimeError: If the shape of input audio waveform does not match (..., 2).
2079
+
2080
+ Supported Platforms:
2081
+ ``CPU``
2082
+
2083
+ Examples:
2084
+ >>> import numpy as np
2085
+ >>> import mindspore.dataset as ds
2086
+ >>> import mindspore.dataset.audio as audio
2087
+ >>>
2088
+ >>> # Use the transform in dataset pipeline mode
2089
+ >>> waveform = np.random.random([5, 16, 2]) # 5 samples
2090
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2091
+ >>> transforms = [audio.Magphase()]
2092
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"],
2093
+ ... output_columns=["spect", "phase"])
2094
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2095
+ ... print(item["spect"].shape, item["spect"].dtype)
2096
+ ... break
2097
+ (16,) float64
2098
+ >>>
2099
+ >>> # Use the transform in eager mode
2100
+ >>> waveform = np.random.random([16, 2]) # 1 sample
2101
+ >>> output = audio.Magphase()(waveform)
2102
+ >>> print(output[0].shape, output[0].dtype)
2103
+ (16,) float64
2104
+
2105
+ Tutorial Examples:
2106
+ - `Illustration of audio transforms
2107
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2108
+ """
2109
+
2110
+ @check_magphase
2111
+ def __init__(self, power=1.0):
2112
+ super().__init__()
2113
+ self.power = power
2114
+
2115
+ def parse(self):
2116
+ return cde.MagphaseOperation(self.power)
2117
+
2118
+
2119
+ class MaskAlongAxis(AudioTensorOperation):
2120
+ """
2121
+ Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` .
2122
+
2123
+ Args:
2124
+ mask_start (int): Starting position of the mask, which must be non negative.
2125
+ mask_width (int): The width of the mask, which must be larger than 0.
2126
+ mask_value (float): Value to assign to the masked columns.
2127
+ axis (int): Axis to apply mask on (1 for frequency and 2 for time).
2128
+
2129
+ Raises:
2130
+ ValueError: If `mask_start` is invalid (< 0).
2131
+ ValueError: If `mask_width` is invalid (< 1).
2132
+ ValueError: If `axis` is not type of int or not within [1, 2].
2133
+
2134
+ Supported Platforms:
2135
+ ``CPU``
2136
+
2137
+ Examples:
2138
+ >>> import numpy as np
2139
+ >>> import mindspore.dataset as ds
2140
+ >>> import mindspore.dataset.audio as audio
2141
+ >>>
2142
+ >>> # Use the transform in dataset pipeline mode
2143
+ >>> waveform = np.random.random([5, 20, 20]) # 5 samples
2144
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2145
+ >>> transforms = [audio.MaskAlongAxis(0, 10, 0.5, 1)]
2146
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2147
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2148
+ ... print(item["audio"].shape, item["audio"].dtype)
2149
+ ... break
2150
+ (20, 20) float64
2151
+ >>>
2152
+ >>> # Use the transform in eager mode
2153
+ >>> waveform = np.random.random([20, 20]) # 1 sample
2154
+ >>> output = audio.MaskAlongAxis(0, 10, 0.5, 1)(waveform)
2155
+ >>> print(output.shape, output.dtype)
2156
+ (20, 20) float64
2157
+
2158
+ Tutorial Examples:
2159
+ - `Illustration of audio transforms
2160
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2161
+ """
2162
+
2163
+ @check_mask_along_axis
2164
+ def __init__(self, mask_start, mask_width, mask_value, axis):
2165
+ super().__init__()
2166
+ self.mask_start = mask_start
2167
+ self.mask_width = mask_width
2168
+ self.mask_value = mask_value
2169
+ self.axis = axis
2170
+
2171
+ def parse(self):
2172
+ return cde.MaskAlongAxisOperation(self.mask_start, self.mask_width, self.mask_value, self.axis)
2173
+
2174
+
2175
+ class MaskAlongAxisIID(AudioTensorOperation):
2176
+ """
2177
+ Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` , where
2178
+ `mask_width` is sampled from `uniform[0, mask_param]` , and `mask_start` from
2179
+ `uniform[0, max_length - mask_width]` , `max_length` is the number of columns of the specified axis
2180
+ of the spectrogram.
2181
+
2182
+ Args:
2183
+ mask_param (int): Number of columns to be masked, will be uniformly sampled from
2184
+ [0, mask_param], must be non negative.
2185
+ mask_value (float): Value to assign to the masked columns.
2186
+ axis (int): Axis to apply mask on (1 for frequency and 2 for time).
2187
+
2188
+ Raises:
2189
+ TypeError: If `mask_param` is not of type int.
2190
+ ValueError: If `mask_param` is a negative value.
2191
+ TypeError: If `mask_value` is not of type float.
2192
+ TypeError: If `axis` is not of type int.
2193
+ ValueError: If `axis` is not in range of [1, 2].
2194
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
2195
+
2196
+ Supported Platforms:
2197
+ ``CPU``
2198
+
2199
+ Examples:
2200
+ >>> import numpy as np
2201
+ >>> import mindspore.dataset as ds
2202
+ >>> import mindspore.dataset.audio as audio
2203
+ >>>
2204
+ >>> # Use the transform in dataset pipeline mode
2205
+ >>> waveform= np.random.random([5, 20, 20]) # 5 samples
2206
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2207
+ >>> transforms = [audio.MaskAlongAxisIID(5, 0.5, 2)]
2208
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2209
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2210
+ ... print(item["audio"].shape, item["audio"].dtype)
2211
+ ... break
2212
+ (20, 20) float64
2213
+ >>>
2214
+ >>> # Use the transform in eager mode
2215
+ >>> waveform = np.random.random([20, 20]) # 1 sample
2216
+ >>> output = audio.MaskAlongAxisIID(5, 0.5, 2)(waveform)
2217
+ >>> print(output.shape, output.dtype)
2218
+ (20, 20) float64
2219
+
2220
+ Tutorial Examples:
2221
+ - `Illustration of audio transforms
2222
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2223
+ """
2224
+
2225
+ @check_mask_along_axis_iid
2226
+ def __init__(self, mask_param, mask_value, axis):
2227
+ super().__init__()
2228
+ self.mask_param = mask_param
2229
+ self.mask_value = mask_value
2230
+ self.axis = axis
2231
+
2232
+ def parse(self):
2233
+ return cde.MaskAlongAxisIIDOperation(self.mask_param, self.mask_value, self.axis)
2234
+
2235
+
2236
+ DE_C_MEL_TYPE = {MelType.SLANEY: cde.MelType.DE_MEL_TYPE_SLANEY,
2237
+ MelType.HTK: cde.MelType.DE_MEL_TYPE_HTK}
2238
+
2239
+ DE_C_NORM_TYPE = {NormType.NONE: cde.NormType.DE_NORM_TYPE_NONE,
2240
+ NormType.SLANEY: cde.NormType.DE_NORM_TYPE_SLANEY}
2241
+
2242
+
2243
+ class MelScale(AudioTensorOperation):
2244
+ """
2245
+ Convert normal STFT to STFT at the Mel scale.
2246
+
2247
+ Args:
2248
+ n_mels (int, optional): Number of mel filterbanks. Default: ``128``.
2249
+ sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
2250
+ f_min (float, optional): Minimum frequency. Default: ``0.0``.
2251
+ f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
2252
+ n_stft (int, optional): Number of bins in STFT. Default: ``201``.
2253
+ norm (NormType, optional): Type of norm, value should be ``NormType.SLANEY`` or ``NormType.NONE``.
2254
+ If `norm` is ``NormType.SLANEY``, divide the triangular mel weight by the width of the mel band.
2255
+ Default: ``NormType.NONE``, no narmalization.
2256
+ mel_type (MelType, optional): Type to use, value should be ``MelType.SLANEY`` or ``MelType.HTK``.
2257
+ Default: ``MelType.HTK``.
2258
+
2259
+ Raises:
2260
+ TypeError: If `n_mels` is not of type int.
2261
+ ValueError: If `n_mels` is not positive.
2262
+ TypeError: If `sample_rate` is not of type int.
2263
+ ValueError: If `sample_rate` is not positive.
2264
+ TypeError: If `f_min` is not of type float.
2265
+ ValueError: If `f_min` is greater than or equal to `f_max` .
2266
+ TypeError: If `f_max` is not of type float.
2267
+ ValueError: If `f_max` is a negative number.
2268
+ TypeError: If `n_stft` is not of type int.
2269
+ ValueError: If `n_stft` is not positive.
2270
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
2271
+ TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
2272
+
2273
+ Supported Platforms:
2274
+ ``CPU``
2275
+
2276
+ Examples:
2277
+ >>> import numpy as np
2278
+ >>> import mindspore.dataset as ds
2279
+ >>> import mindspore.dataset.audio as audio
2280
+ >>>
2281
+ >>> # Use the transform in dataset pipeline mode
2282
+ >>> waveform = np.random.random([5, 201, 3]) # 5 samples
2283
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2284
+ >>> transforms = [audio.MelScale(200, 1500, 0.7)]
2285
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2286
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2287
+ ... print(item["audio"].shape, item["audio"].dtype)
2288
+ ... break
2289
+ (200, 3) float64
2290
+ >>>
2291
+ >>> # Use the transform in eager mode
2292
+ >>> waveform = np.random.random([201, 3]) # 1 sample
2293
+ >>> output = audio.MelScale(200, 1500, 0.7)(waveform)
2294
+ >>> print(output.shape, output.dtype)
2295
+ (200, 3) float64
2296
+
2297
+ Tutorial Examples:
2298
+ - `Illustration of audio transforms
2299
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2300
+ """
2301
+
2302
+ @check_mel_scale
2303
+ def __init__(self, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, n_stft=201, norm=NormType.NONE,
2304
+ mel_type=MelType.HTK):
2305
+ super().__init__()
2306
+ self.n_mels = n_mels
2307
+ self.sample_rate = sample_rate
2308
+ self.f_min = f_min
2309
+ self.f_max = f_max if f_max is not None else sample_rate // 2
2310
+ self.n_stft = n_stft
2311
+ self.norm = norm
2312
+ self.mel_type = mel_type
2313
+
2314
+ def parse(self):
2315
+ return cde.MelScaleOperation(self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_stft,
2316
+ DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
2317
+
2318
+
2319
+ class MelSpectrogram(AudioTensorOperation):
2320
+ r"""
2321
+ Create MelSpectrogram for a raw audio signal.
2322
+
2323
+ Args:
2324
+ sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0.
2325
+ Default: ``16000``.
2326
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0 and less than
2327
+ twice of the last dimension size of the input. Default: ``400``.
2328
+ win_length (int, optional): Window size, which should be greater than 0 and no more than `n_fft` . Default:
2329
+ None, will be set to `n_fft` .
2330
+ hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
2331
+ Default: ``None``, will be set to `win_length // 2` .
2332
+ f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: ``0.0``.
2333
+ f_max (float, optional): Maximum frequency, which can't be less than 0. Default: ``None``, will be set
2334
+ to `sample_rate // 2` .
2335
+ pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: ``0``.
2336
+ n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: ``128``.
2337
+ window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
2338
+ frame/window. Default: ``WindowType.HANN``.
2339
+ power (float, optional): Exponent for the magnitude spectrogram, which must be
2340
+ greater than 0, e.g., ``1`` for energy, ``2`` for power, etc. Default: ``2.0``.
2341
+ normalized (bool, optional): Whether to normalize by magnitude after stft. Default: ``False``.
2342
+ center (bool, optional): Whether to pad waveform on both sides. Default: ``True``.
2343
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
2344
+ can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
2345
+ Default: ``BorderType.REFLECT``.
2346
+ onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: ``True``.
2347
+ norm (NormType, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
2348
+ (area normalization). Default: ``NormType.NONE``, no narmalization.
2349
+ mel_scale (MelType, optional): Mel scale to use, can be ``MelType.SLANEY`` or ``MelType.HTK``.
2350
+ Default: ``MelType.HTK``.
2351
+
2352
+ Raises:
2353
+ TypeError: If `sample_rate` is not of type int.
2354
+ TypeError: If `n_fft` is not of type int.
2355
+ TypeError: If `n_mels` is not of type int.
2356
+ TypeError: If `f_min` is not of type float.
2357
+ TypeError: If `f_max` is not of type float.
2358
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
2359
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
2360
+ TypeError: If `mel_scale` is not of type :class:`mindspore.dataset.audio.MelType` .
2361
+ TypeError: If `power` is not of type float.
2362
+ TypeError: If `normalized` is not of type bool.
2363
+ TypeError: If `center` is not of type bool.
2364
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
2365
+ TypeError: If `onesided` is not of type bool.
2366
+ TypeError: If `pad` is not of type int.
2367
+ TypeError: If `win_length` is not of type int.
2368
+ TypeError: If `hop_length` is not of type int.
2369
+ ValueError: If `sample_rate` is a negative number.
2370
+ ValueError: If `n_fft` is not positive.
2371
+ ValueError: If `n_mels` is a negative number.
2372
+ ValueError: If `f_min` is greater than `f_max` .
2373
+ ValueError: If `f_max` is a negative number.
2374
+ ValueError: If `f_min` is not less than `sample_rate // 2` when `f_max` is set to None.
2375
+ ValueError: If `power` is not positive.
2376
+ ValueError: If `pad` is a negative number.
2377
+ ValueError: If `win_length` is not positive.
2378
+ ValueError: If `hop_length` is not positive.
2379
+
2380
+ Supported Platforms:
2381
+ ``CPU``
2382
+
2383
+ Examples:
2384
+ >>> import numpy as np
2385
+ >>> import mindspore.dataset as ds
2386
+ >>> import mindspore.dataset.audio as audio
2387
+ >>>
2388
+ >>>
2389
+ >>> # Use the transform in dataset pipeline mode
2390
+ >>> waveform = np.random.random([5, 32]) # 5 samples
2391
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2392
+ >>> transforms = [audio.MelSpectrogram(sample_rate=16000, n_fft=16, win_length=16, hop_length=8, f_min=0.0,
2393
+ ... f_max=5000.0, pad=0, n_mels=2, window=audio.WindowType.HANN, power=2.0,
2394
+ ... normalized=False, center=True, pad_mode=audio.BorderType.REFLECT,
2395
+ ... onesided=True, norm=audio.NormType.SLANEY, mel_scale=audio.MelType.HTK)]
2396
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2397
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2398
+ ... print(item["audio"].shape, item["audio"].dtype)
2399
+ ... break
2400
+ (2, 5) float64
2401
+ >>>
2402
+ >>> # Use the transform in eager mode
2403
+ >>> waveform = np.random.random([32]) # 1 sample
2404
+ >>> output = audio.MelSpectrogram(sample_rate=16000, n_fft=16, win_length=16, hop_length=8, f_min=0.0,
2405
+ ... f_max=5000.0, pad=0, n_mels=2, window=audio.WindowType.HANN, power=2.0,
2406
+ ... normalized=False, center=True, pad_mode=audio.BorderType.REFLECT,
2407
+ ... onesided=True, norm=audio.NormType.SLANEY,
2408
+ ... mel_scale=audio.MelType.HTK)(waveform)
2409
+ >>> print(output.shape, output.dtype)
2410
+ (2, 5) float64
2411
+
2412
+ Tutorial Examples:
2413
+ - `Illustration of audio transforms
2414
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2415
+ """
2416
+
2417
+ @check_mel_spectrogram
2418
+ def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0.0, f_max=None, pad=0,
2419
+ n_mels=128, window=WindowType.HANN, power=2.0, normalized=False, center=True,
2420
+ pad_mode=BorderType.REFLECT, onesided=True, norm=NormType.NONE, mel_scale=MelType.HTK):
2421
+ super().__init__()
2422
+ self.sample_rate = sample_rate
2423
+ self.n_fft = n_fft
2424
+ self.win_length = win_length if win_length is not None else n_fft
2425
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 2
2426
+ self.f_min = f_min
2427
+ self.f_max = f_max if f_max is not None else sample_rate // 2
2428
+ self.pad = pad
2429
+ self.n_mels = n_mels
2430
+ self.window = window
2431
+ self.power = power
2432
+ self.normalized = normalized
2433
+ self.center = center
2434
+ self.pad_mode = pad_mode
2435
+ self.onesided = onesided
2436
+ self.norm = norm
2437
+ self.mel_scale = mel_scale
2438
+
2439
+ def parse(self):
2440
+ return cde.MelSpectrogramOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length, self.f_min,
2441
+ self.f_max, self.pad, self.n_mels, DE_C_WINDOW_TYPE.get(self.window),
2442
+ self.power, self.normalized, self.center,
2443
+ DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided,
2444
+ DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_scale))
2445
+
2446
+
2447
+ class MFCC(AudioTensorOperation):
2448
+ """
2449
+ Create MFCC for a raw audio signal.
2450
+
2451
+ Args:
2452
+ sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: ``16000``.
2453
+ n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: ``40``.
2454
+ dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be ``2``. Default: ``2``.
2455
+ norm (NormMode, optional): Norm to use. Default: ``NormMode.ORTHO``.
2456
+ log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: ``False``.
2457
+ melkwargs (dict, optional): Arguments for :class:`mindspore.dataset.audio.MelSpectrogram`.
2458
+ Default: ``None``, the default setting is a dict including
2459
+
2460
+ - 'n_fft': 400
2461
+ - 'win_length': n_fft
2462
+ - 'hop_length': win_length // 2
2463
+ - 'f_min': 0.0
2464
+ - 'f_max': sample_rate // 2
2465
+ - 'pad': 0
2466
+ - 'window': WindowType.HANN
2467
+ - 'power': 2.0
2468
+ - 'normalized': False
2469
+ - 'center': True
2470
+ - 'pad_mode': BorderType.REFLECT
2471
+ - 'onesided': True
2472
+ - 'norm': NormType.NONE
2473
+ - 'mel_scale': MelType.HTK
2474
+
2475
+ Raises:
2476
+ TypeError: If `sample_rate` is not of type int.
2477
+ TypeError: If `log_mels` is not of type bool.
2478
+ TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
2479
+ TypeError: If `n_mfcc` is not of type int.
2480
+ TypeError: If `melkwargs` is not of type dict.
2481
+ ValueError: If `sample_rate` is a negative number.
2482
+ ValueError: If `n_mfcc` is a negative number.
2483
+ ValueError: If `dct_type` is not ``2``.
2484
+
2485
+ Supported Platforms:
2486
+ ``CPU``
2487
+
2488
+ Examples:
2489
+ >>> import numpy as np
2490
+ >>> import mindspore.dataset as ds
2491
+ >>> import mindspore.dataset.audio as audio
2492
+ >>>
2493
+ >>> # Use the transform in dataset pipeline mode
2494
+ >>> waveform = np.random.random([5, 500]) # 5 samples
2495
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2496
+ >>> transforms = [audio.MFCC(4000, 128, 2)]
2497
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2498
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2499
+ ... print(item["audio"].shape, item["audio"].dtype)
2500
+ ... break
2501
+ (128, 3) float32
2502
+ >>>
2503
+ >>> # Use the transform in eager mode
2504
+ >>> waveform = np.random.random([500]) # 1 sample
2505
+ >>> output = audio.MFCC(4000, 128, 2)(waveform)
2506
+ >>> print(output.shape, output.dtype)
2507
+ (128, 3) float32
2508
+
2509
+ Tutorial Examples:
2510
+ - `Illustration of audio transforms
2511
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2512
+ """
2513
+
2514
+ @check_mfcc
2515
+ def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None):
2516
+ super().__init__()
2517
+ self.sample_rate = sample_rate
2518
+ self.n_mfcc = n_mfcc
2519
+ self.dct_type = dct_type
2520
+ self.norm = norm
2521
+ self.log_mels = log_mels
2522
+ self.melkwargs = melkwargs
2523
+ if melkwargs is None:
2524
+ self.melkwargs = {}
2525
+ self.melkwargs.setdefault("n_fft", 400)
2526
+ self.melkwargs.setdefault("win_length", self.melkwargs.get("n_fft"))
2527
+ self.melkwargs.setdefault("hop_length", self.melkwargs.get("win_length") // 2)
2528
+ self.melkwargs.setdefault("f_min", 0.0)
2529
+ self.melkwargs.setdefault("f_max", sample_rate // 2)
2530
+ self.melkwargs.setdefault("pad", 0)
2531
+ self.melkwargs.setdefault("n_mels", 128)
2532
+ self.melkwargs.setdefault("window", WindowType.HANN)
2533
+ self.melkwargs.setdefault("power", 2.0)
2534
+ self.melkwargs.setdefault("normalized", False)
2535
+ self.melkwargs.setdefault("center", True)
2536
+ self.melkwargs.setdefault("pad_mode", BorderType.REFLECT)
2537
+ self.melkwargs.setdefault("onesided", True)
2538
+ self.melkwargs.setdefault("norm", NormType.NONE)
2539
+ self.melkwargs.setdefault("mel_scale", MelType.HTK)
2540
+ self.window = self.melkwargs.get("window")
2541
+ self.pad_mode = self.melkwargs.get("pad_mode")
2542
+ self.norm_mel = self.melkwargs.get("norm")
2543
+ self.mel_scale = self.melkwargs.get("mel_scale")
2544
+
2545
+ def parse(self):
2546
+ return cde.MFCCOperation(self.sample_rate, self.n_mfcc, self.dct_type, DE_C_NORM_MODE.get(self.norm),
2547
+ self.log_mels, self.melkwargs, DE_C_WINDOW_TYPE.get(self.window),
2548
+ DE_C_BORDER_TYPE.get(self.pad_mode), DE_C_NORM_TYPE.get(self.norm_mel),
2549
+ DE_C_MEL_TYPE.get(self.mel_scale))
2550
+
2551
+
2552
+ class MuLawDecoding(AudioTensorOperation):
2553
+ """
2554
+ Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
2555
+
2556
+ Args:
2557
+ quantization_channels (int, optional): Number of channels, which must be positive. Default: ``256``.
2558
+
2559
+ Raises:
2560
+ TypeError: If `quantization_channels` is not of type int.
2561
+ ValueError: If `quantization_channels` is not a positive number.
2562
+ RuntimeError: If input tensor is not in shape of <..., time>.
2563
+
2564
+ Supported Platforms:
2565
+ ``CPU``
2566
+
2567
+ Examples:
2568
+ >>> import numpy as np
2569
+ >>> import mindspore.dataset as ds
2570
+ >>> import mindspore.dataset.audio as audio
2571
+ >>>
2572
+ >>> # Use the transform in dataset pipeline mode
2573
+ >>> waveform = np.random.random([5, 3, 4]) # 5 samples
2574
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2575
+ >>> transforms = [audio.MuLawDecoding()]
2576
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2577
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2578
+ ... print(item["audio"].shape, item["audio"].dtype)
2579
+ ... break
2580
+ (3, 4) float64
2581
+ >>>
2582
+ >>> # Use the transform in eager mode
2583
+ >>> waveform = np.random.random([3, 4]) # 1 sample
2584
+ >>> output = audio.MuLawDecoding()(waveform)
2585
+ >>> print(output.shape, output.dtype)
2586
+ (3, 4) float64
2587
+
2588
+ Tutorial Examples:
2589
+ - `Illustration of audio transforms
2590
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2591
+ """
2592
+
2593
+ @check_mu_law_coding
2594
+ def __init__(self, quantization_channels=256):
2595
+ super().__init__()
2596
+ self.quantization_channels = quantization_channels
2597
+
2598
+ def parse(self):
2599
+ return cde.MuLawDecodingOperation(self.quantization_channels)
2600
+
2601
+
2602
+ class MuLawEncoding(AudioTensorOperation):
2603
+ """
2604
+ Encode signal based on mu-law companding.
2605
+
2606
+ Args:
2607
+ quantization_channels (int, optional): Number of channels, which must be positive. Default: ``256``.
2608
+
2609
+ Raises:
2610
+ TypeError: If `quantization_channels` is not of type int.
2611
+ ValueError: If `quantization_channels` is not a positive number.
2612
+
2613
+ Supported Platforms:
2614
+ ``CPU``
2615
+
2616
+ Examples:
2617
+ >>> import numpy as np
2618
+ >>> import mindspore.dataset as ds
2619
+ >>> import mindspore.dataset.audio as audio
2620
+ >>>
2621
+ >>> # Use the transform in dataset pipeline mode
2622
+ >>> waveform = np.random.random([5, 3, 4]) # 5 samples
2623
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2624
+ >>> transforms = [audio.MuLawEncoding()]
2625
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2626
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2627
+ ... print(item["audio"].shape, item["audio"].dtype)
2628
+ ... break
2629
+ (3, 4) int32
2630
+ >>>
2631
+ >>> # Use the transform in eager mode
2632
+ >>> waveform = np.random.random([3, 4]) # 1 sample
2633
+ >>> output = audio.MuLawEncoding()(waveform)
2634
+ >>> print(output.shape, output.dtype)
2635
+ (3, 4) int32
2636
+
2637
+ Tutorial Examples:
2638
+ - `Illustration of audio transforms
2639
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2640
+ """
2641
+
2642
+ @check_mu_law_coding
2643
+ def __init__(self, quantization_channels=256):
2644
+ super().__init__()
2645
+ self.quantization_channels = quantization_channels
2646
+
2647
+ def parse(self):
2648
+ return cde.MuLawEncodingOperation(self.quantization_channels)
2649
+
2650
+
2651
+ class Overdrive(AudioTensorOperation):
2652
+ """
2653
+ Apply an overdrive effect to the audio waveform.
2654
+
2655
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
2656
+
2657
+ Args:
2658
+ gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100].
2659
+ Default: ``20.0``.
2660
+ color (float, optional): Controls the amount of even harmonic content in the over-driven output,
2661
+ in range of [0, 100]. Default: ``20.0``.
2662
+
2663
+ Raises:
2664
+ TypeError: If `gain` is not of type float.
2665
+ ValueError: If `gain` is not in range of [0, 100].
2666
+ TypeError: If `color` is not of type float.
2667
+ ValueError: If `color` is not in range of [0, 100].
2668
+ RuntimeError: If input tensor is not in shape of <..., time>.
2669
+
2670
+ Supported Platforms:
2671
+ ``CPU``
2672
+
2673
+ Examples:
2674
+ >>> import numpy as np
2675
+ >>> import mindspore.dataset as ds
2676
+ >>> import mindspore.dataset.audio as audio
2677
+ >>>
2678
+ >>> # Use the transform in dataset pipeline mode
2679
+ >>> waveform = np.random.random([5, 10]) # 5 samples
2680
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2681
+ >>> transforms = [audio.Overdrive()]
2682
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2683
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2684
+ ... print(item["audio"].shape, item["audio"].dtype)
2685
+ ... break
2686
+ (10,) float64
2687
+ >>>
2688
+ >>> # Use the transform in eager mode
2689
+ >>> waveform = np.random.random([10]) # 1 sample
2690
+ >>> output = audio.Overdrive()(waveform)
2691
+ >>> print(output.shape, output.dtype)
2692
+ (10,) float64
2693
+
2694
+ Tutorial Examples:
2695
+ - `Illustration of audio transforms
2696
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2697
+ """
2698
+
2699
+ @check_overdrive
2700
+ def __init__(self, gain=20.0, color=20.0):
2701
+ super().__init__()
2702
+ self.gain = gain
2703
+ self.color = color
2704
+
2705
+ def parse(self):
2706
+ return cde.OverdriveOperation(self.gain, self.color)
2707
+
2708
+
2709
+ class Phaser(AudioTensorOperation):
2710
+ """
2711
+ Apply a phasing effect to the audio.
2712
+
2713
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
2714
+
2715
+ Args:
2716
+ sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
2717
+ gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB,
2718
+ in range of [0.0, 1.0]. Default: ``0.4``.
2719
+ gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB,
2720
+ in range of [0.0, 1e9]. Default: ``0.74``.
2721
+ delay_ms (float, optional): Desired delay in milliseconds, in range of [0.0, 5.0]. Default: ``3.0``.
2722
+ decay (float, optional): Desired decay relative to gain-in, in range of [0.0, 0.99]. Default: ``0.4``.
2723
+ mod_speed (float, optional): Modulation speed in Hz, in range of [0.1, 2.0]. Default: ``0.5``.
2724
+ sinusoidal (bool, optional): If ``True``, use sinusoidal modulation (preferable for multiple instruments).
2725
+ If ``False``, use triangular modulation (gives single instruments a sharper phasing effect).
2726
+ Default: ``True``.
2727
+
2728
+ Raises:
2729
+ TypeError: If `sample_rate` is not of type int.
2730
+ TypeError: If `gain_in` is not of type float.
2731
+ ValueError: If `gain_in` is not in range of [0.0, 1.0].
2732
+ TypeError: If `gain_out` is not of type float.
2733
+ ValueError: If `gain_out` is not in range of [0.0, 1e9].
2734
+ TypeError: If `delay_ms` is not of type float.
2735
+ ValueError: If `delay_ms` is not in range of [0.0, 5.0].
2736
+ TypeError: If `decay` is not of type float.
2737
+ ValueError: If `decay` is not in range of [0.0, 0.99].
2738
+ TypeError: If `mod_speed` is not of type float.
2739
+ ValueError: If `mod_speed` is not in range of [0.1, 2.0].
2740
+ TypeError: If `sinusoidal` is not of type bool.
2741
+ RuntimeError: If input tensor is not in shape of <..., time>.
2742
+
2743
+ Supported Platforms:
2744
+ ``CPU``
2745
+
2746
+ Examples:
2747
+ >>> import numpy as np
2748
+ >>> import mindspore.dataset as ds
2749
+ >>> import mindspore.dataset.audio as audio
2750
+ >>>
2751
+ >>> # Use the transform in dataset pipeline mode
2752
+ >>> waveform = np.random.random([5, 12]) # 5 samples
2753
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2754
+ >>> transforms = [audio.Phaser(44100)]
2755
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2756
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2757
+ ... print(item["audio"].shape, item["audio"].dtype)
2758
+ ... break
2759
+ (12,) float64
2760
+ >>>
2761
+ >>> # Use the transform in eager mode
2762
+ >>> waveform = np.random.random([12]) # 1 sample
2763
+ >>> output = audio.Phaser(44100)(waveform)
2764
+ >>> print(output.shape, output.dtype)
2765
+ (12,) float64
2766
+
2767
+ Tutorial Examples:
2768
+ - `Illustration of audio transforms
2769
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2770
+ """
2771
+
2772
+ @check_phaser
2773
+ def __init__(self, sample_rate, gain_in=0.4, gain_out=0.74, delay_ms=3.0, decay=0.4, mod_speed=0.5,
2774
+ sinusoidal=True):
2775
+ super().__init__()
2776
+ self.decay = decay
2777
+ self.delay_ms = delay_ms
2778
+ self.gain_in = gain_in
2779
+ self.gain_out = gain_out
2780
+ self.mod_speed = mod_speed
2781
+ self.sample_rate = sample_rate
2782
+ self.sinusoidal = sinusoidal
2783
+
2784
+ def parse(self):
2785
+ return cde.PhaserOperation(self.sample_rate, self.gain_in, self.gain_out,
2786
+ self.delay_ms, self.decay, self.mod_speed, self.sinusoidal)
2787
+
2788
+
2789
+ class PhaseVocoder(AudioTensorOperation):
2790
+ """
2791
+ Given a STFT spectrogram, speed up in time without modifying pitch by a factor of rate.
2792
+
2793
+ Args:
2794
+ rate (float): Speed-up factor.
2795
+ phase_advance (numpy.ndarray): Expected phase advance in each bin, in shape of (freq, 1).
2796
+
2797
+ Raises:
2798
+ TypeError: If `rate` is not of type float.
2799
+ ValueError: If `rate` is not a positive number.
2800
+ TypeError: If `phase_advance` is not of type :class:`numpy.ndarray` .
2801
+ RuntimeError: If input tensor is not in shape of <..., freq, num_frame, complex=2>.
2802
+
2803
+ Supported Platforms:
2804
+ ``CPU``
2805
+
2806
+ Examples:
2807
+ >>> import numpy as np
2808
+ >>> import mindspore.dataset as ds
2809
+ >>> import mindspore.dataset.audio as audio
2810
+ >>>
2811
+ >>> # Use the transform in dataset pipeline mode
2812
+ >>> waveform = np.random.random([5, 44, 10, 2]) # 5 samples
2813
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2814
+ >>> transforms = [audio.PhaseVocoder(rate=2, phase_advance=np.random.random([44, 1]))]
2815
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2816
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2817
+ ... print(item["audio"].shape, item["audio"].dtype)
2818
+ ... break
2819
+ (44, 5, 2) float64
2820
+ >>>
2821
+ >>> # Use the transform in eager mode
2822
+ >>> waveform = np.random.random([44, 10, 2]) # 1 sample
2823
+ >>> output = audio.PhaseVocoder(rate=2, phase_advance=np.random.random([44, 1]))(waveform)
2824
+ >>> print(output.shape, output.dtype)
2825
+ (44, 5, 2) float64
2826
+
2827
+ Tutorial Examples:
2828
+ - `Illustration of audio transforms
2829
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2830
+ """
2831
+
2832
+ @check_phase_vocoder
2833
+ def __init__(self, rate, phase_advance):
2834
+ super().__init__()
2835
+ self.rate = rate
2836
+ self.phase_advance = cde.Tensor(phase_advance)
2837
+
2838
+ def parse(self):
2839
+ return cde.PhaseVocoderOperation(self.rate, self.phase_advance)
2840
+
2841
+
2842
+ class PitchShift(AudioTensorOperation):
2843
+ """
2844
+ Shift the pitch of a waveform by `n_steps` steps.
2845
+
2846
+ Args:
2847
+ sample_rate (int): Sampling rate of waveform (in Hz).
2848
+ n_steps (int): The steps to shift waveform.
2849
+ bins_per_octave (int, optional): The number of steps per octave. Default: ``12``.
2850
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``512``.
2851
+ win_length (int, optional): Window size. Default: ``None``, will be set to `n_fft` .
2852
+ hop_length (int, optional): Length of hop between STFT windows. Default: ``None``,
2853
+ will be set to `win_length // 4` .
2854
+ window (WindowType, optional): Window tensor that is applied/multiplied to each frame/window.
2855
+ Default: ``WindowType.HANN``.
2856
+
2857
+ Raises:
2858
+ TypeError: If `sample_rate` is not of type int.
2859
+ TypeError: If `n_steps` is not of type int.
2860
+ TypeError: If `bins_per_octave` is not of type int.
2861
+ TypeError: If `n_fft` is not of type int.
2862
+ TypeError: If `win_length` is not of type int.
2863
+ TypeError: If `hop_length` is not of type int.
2864
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
2865
+ ValueError: If `sample_rate` is a negative number.
2866
+ ValueError: If `bins_per_octave` is 0.
2867
+ ValueError: If `n_fft` is a negative number.
2868
+ ValueError: If `win_length` is not positive.
2869
+ ValueError: If `hop_length` is not positive.
2870
+
2871
+ Supported Platforms:
2872
+ ``CPU``
2873
+
2874
+ Examples:
2875
+ >>> import numpy as np
2876
+ >>> import mindspore.dataset as ds
2877
+ >>> import mindspore.dataset.audio as audio
2878
+ >>>
2879
+ >>> # Use the transform in dataset pipeline mode
2880
+ >>> waveform = np.random.random([5, 8, 30]) # 5 samples
2881
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2882
+ >>> transforms = [audio.PitchShift(sample_rate=16000, n_steps=4)]
2883
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2884
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2885
+ ... print(item["audio"].shape, item["audio"].dtype)
2886
+ ... break
2887
+ (8, 30) float64
2888
+ >>>
2889
+ >>> # Use the transform in eager mode
2890
+ >>> waveform = np.random.random([8, 30]) # 1 sample
2891
+ >>> output = audio.PitchShift(sample_rate=16000, n_steps=4)(waveform)
2892
+ >>> print(output.shape, output.dtype)
2893
+ (8, 30) float64
2894
+
2895
+ Tutorial Examples:
2896
+ - `Illustration of audio transforms
2897
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2898
+ """
2899
+
2900
+ @check_pitch_shift
2901
+ def __init__(self, sample_rate, n_steps, bins_per_octave=12, n_fft=512, win_length=None,
2902
+ hop_length=None, window=WindowType.HANN):
2903
+ super().__init__()
2904
+ self.sample_rate = sample_rate
2905
+ self.n_steps = n_steps
2906
+ self.bins_per_octave = bins_per_octave
2907
+ self.n_fft = n_fft
2908
+ self.win_length = win_length if win_length is not None else n_fft
2909
+ self.hop_length = hop_length if hop_length is not None else self.win_length // 4
2910
+ self.window = window
2911
+
2912
+ def parse(self):
2913
+ return cde.PitchShiftOperation(self.sample_rate, self.n_steps, self.bins_per_octave, self.n_fft,
2914
+ self.win_length, self.hop_length, DE_C_WINDOW_TYPE.get(self.window))
2915
+
2916
+
2917
+ DE_C_RESAMPLE_METHOD = {ResampleMethod.SINC_INTERPOLATION: cde.ResampleMethod.DE_RESAMPLE_SINC_INTERPOLATION,
2918
+ ResampleMethod.KAISER_WINDOW: cde.ResampleMethod.DE_RESAMPLE_KAISER_WINDOW}
2919
+
2920
+
2921
+ class Resample(AudioTensorOperation):
2922
+ """
2923
+ Resample a signal from one frequency to another. A resample method can be given.
2924
+
2925
+ Args:
2926
+ orig_freq (float, optional): The original frequency of the signal, must be positive. Default: ``16000``.
2927
+ new_freq (float, optional): The desired frequency, must be positive. Default: ``16000``.
2928
+ resample_method (ResampleMethod, optional): The resample method to use, can be
2929
+ ``ResampleMethod.SINC_INTERPOLATION`` or ``ResampleMethod.KAISER_WINDOW``.
2930
+ Default: ``ResampleMethod.SINC_INTERPOLATION``.
2931
+ lowpass_filter_width (int, optional): Controls the sharpness of the filter, more means sharper but less
2932
+ efficient, must be positive. Default: ``6``.
2933
+ rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist. Lower values
2934
+ reduce anti-aliasing, but also reduce some of the highest frequencies, in range of (0, 1].
2935
+ Default: ``0.99``.
2936
+ beta (float, optional): The shape parameter used for kaiser window. Default: ``None``,
2937
+ will use ``14.769656459379492``.
2938
+
2939
+ Raises:
2940
+ TypeError: If `orig_freq` is not of type float.
2941
+ ValueError: If `orig_freq` is not a positive number.
2942
+ TypeError: If `new_freq` is not of type float.
2943
+ ValueError: If `new_freq` is not a positive number.
2944
+ TypeError: If `resample_method` is not of type :class:`mindspore.dataset.audio.ResampleMethod` .
2945
+ TypeError: If `lowpass_filter_width` is not of type int.
2946
+ ValueError: If `lowpass_filter_width` is not a positive number.
2947
+ TypeError: If `rolloff` is not of type float.
2948
+ ValueError: If `rolloff` is not in range of (0, 1].
2949
+ RuntimeError: If input tensor is not in shape of <..., time>.
2950
+
2951
+ Supported Platforms:
2952
+ ``CPU``
2953
+
2954
+ Examples:
2955
+ >>> import numpy as np
2956
+ >>> import mindspore.dataset as ds
2957
+ >>> import mindspore.dataset.audio as audio
2958
+ >>>
2959
+ >>> # Use the transform in dataset pipeline mode
2960
+ >>> waveform = np.random.random([5, 16, 30]) # 5 samples
2961
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
2962
+ >>> transforms = [audio.Resample(orig_freq=48000, new_freq=16000,
2963
+ ... resample_method=audio.ResampleMethod.SINC_INTERPOLATION,
2964
+ ... lowpass_filter_width=6, rolloff=0.99, beta=None)]
2965
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
2966
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
2967
+ ... print(item["audio"].shape, item["audio"].dtype)
2968
+ ... break
2969
+ (16, 10) float64
2970
+ >>>
2971
+ >>> # Use the transform in eager mode
2972
+ >>> waveform = np.random.random([16, 30]) # 1 sample
2973
+ >>> output = audio.Resample(orig_freq=48000, new_freq=16000,
2974
+ ... resample_method=audio.ResampleMethod.SINC_INTERPOLATION,
2975
+ ... lowpass_filter_width=6, rolloff=0.99, beta=None)(waveform)
2976
+ >>> print(output.shape, output.dtype)
2977
+ (16, 10) float64
2978
+
2979
+ Tutorial Examples:
2980
+ - `Illustration of audio transforms
2981
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
2982
+ """
2983
+
2984
+ @check_resample
2985
+ def __init__(self, orig_freq=16000, new_freq=16000, resample_method=ResampleMethod.SINC_INTERPOLATION,
2986
+ lowpass_filter_width=6, rolloff=0.99, beta=None):
2987
+ super().__init__()
2988
+ self.orig_freq = orig_freq
2989
+ self.new_freq = new_freq
2990
+ self.resample_method = resample_method
2991
+ self.lowpass_filter_width = lowpass_filter_width
2992
+ self.rolloff = rolloff
2993
+ kaiser_beta = 14.769656459379492
2994
+ self.beta = beta if beta is not None else kaiser_beta
2995
+
2996
+ def parse(self):
2997
+ return cde.ResampleOperation(self.orig_freq, self.new_freq, DE_C_RESAMPLE_METHOD.get(self.resample_method),
2998
+ self.lowpass_filter_width, self.rolloff, self.beta)
2999
+
3000
+
3001
+ class RiaaBiquad(AudioTensorOperation):
3002
+ """
3003
+ Apply RIAA vinyl playback equalization.
3004
+
3005
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
3006
+
3007
+ Args:
3008
+ sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
3009
+ can only be one of 44100, 48000, 88200, 96000.
3010
+
3011
+ Raises:
3012
+ TypeError: If `sample_rate` is not of type int.
3013
+ ValueError: If `sample_rate` is not any of [44100, 48000, 88200, 96000].
3014
+
3015
+ Supported Platforms:
3016
+ ``CPU``
3017
+
3018
+ Examples:
3019
+ >>> import numpy as np
3020
+ >>> import mindspore.dataset as ds
3021
+ >>> import mindspore.dataset.audio as audio
3022
+ >>>
3023
+ >>> # Use the transform in dataset pipeline mode
3024
+ >>> waveform = np.random.random([5, 24]) # 5 samples
3025
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3026
+ >>> transforms = [audio.RiaaBiquad(44100)]
3027
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3028
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3029
+ ... print(item["audio"].shape, item["audio"].dtype)
3030
+ ... break
3031
+ (24,) float64
3032
+ >>>
3033
+ >>> # Use the transform in eager mode
3034
+ >>> waveform = np.random.random([24]) # 1 sample
3035
+ >>> output = audio.RiaaBiquad(44100)(waveform)
3036
+ >>> print(output.shape, output.dtype)
3037
+ (24,) float64
3038
+
3039
+ Tutorial Examples:
3040
+ - `Illustration of audio transforms
3041
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3042
+ """
3043
+
3044
+ @check_riaa_biquad
3045
+ def __init__(self, sample_rate):
3046
+ super().__init__()
3047
+ self.sample_rate = sample_rate
3048
+
3049
+ def parse(self):
3050
+ return cde.RiaaBiquadOperation(self.sample_rate)
3051
+
3052
+
3053
+ class SlidingWindowCmn(AudioTensorOperation):
3054
+ """
3055
+ Apply sliding-window cepstral mean (and optionally variance) normalization per utterance.
3056
+
3057
+ Args:
3058
+ cmn_window (int, optional): Window in frames for running average CMN computation. Default: ``600``.
3059
+ min_cmn_window (int, optional): Minimum CMN window used at start of decoding (adds latency only at start).
3060
+ Only applicable if center is ``False``, ignored if center is ``True``. Default: ``100``.
3061
+ center (bool, optional): If ``True``, use a window centered on the current frame. If ``False``, window is
3062
+ to the left. Default: ``False``.
3063
+ norm_vars (bool, optional): If ``True``, normalize variance to one. Default: ``False``.
3064
+
3065
+ Raises:
3066
+ TypeError: If `cmn_window` is not of type int.
3067
+ ValueError: If `cmn_window` is a negative number.
3068
+ TypeError: If `min_cmn_window` is not of type int.
3069
+ ValueError: If `min_cmn_window` is a negative number.
3070
+ TypeError: If `center` is not of type bool.
3071
+ TypeError: If `norm_vars` is not of type bool.
3072
+
3073
+ Supported Platforms:
3074
+ ``CPU``
3075
+
3076
+ Examples:
3077
+ >>> import numpy as np
3078
+ >>> import mindspore.dataset as ds
3079
+ >>> import mindspore.dataset.audio as audio
3080
+ >>>
3081
+ >>> # Use the transform in dataset pipeline mode
3082
+ >>> waveform = np.random.random([5, 16, 3]) # 5 samples
3083
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3084
+ >>> transforms = [audio.SlidingWindowCmn()]
3085
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3086
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3087
+ ... print(item["audio"].shape, item["audio"].dtype)
3088
+ ... break
3089
+ (16, 3) float64
3090
+ >>>
3091
+ >>> # Use the transform in eager mode
3092
+ >>> waveform = np.random.random([16, 3]) # 1 sample
3093
+ >>> output = audio.SlidingWindowCmn()(waveform)
3094
+ >>> print(output.shape, output.dtype)
3095
+ (16, 3) float64
3096
+
3097
+ Tutorial Examples:
3098
+ - `Illustration of audio transforms
3099
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3100
+ """
3101
+
3102
+ @check_sliding_window_cmn
3103
+ def __init__(self, cmn_window=600, min_cmn_window=100, center=False, norm_vars=False):
3104
+ super().__init__()
3105
+ self.cmn_window = cmn_window
3106
+ self.min_cmn_window = min_cmn_window
3107
+ self.center = center
3108
+ self.norm_vars = norm_vars
3109
+
3110
+ def parse(self):
3111
+ return cde.SlidingWindowCmnOperation(self.cmn_window, self.min_cmn_window, self.center, self.norm_vars)
3112
+
3113
+
3114
+ DE_C_WINDOW_TYPE = {WindowType.BARTLETT: cde.WindowType.DE_WINDOW_TYPE_BARTLETT,
3115
+ WindowType.BLACKMAN: cde.WindowType.DE_WINDOW_TYPE_BLACKMAN,
3116
+ WindowType.HAMMING: cde.WindowType.DE_WINDOW_TYPE_HAMMING,
3117
+ WindowType.HANN: cde.WindowType.DE_WINDOW_TYPE_HANN,
3118
+ WindowType.KAISER: cde.WindowType.DE_WINDOW_TYPE_KAISER}
3119
+
3120
+
3121
+ class SpectralCentroid(TensorOperation):
3122
+ """
3123
+ Compute the spectral centroid for each channel along the time axis.
3124
+
3125
+ Args:
3126
+ sample_rate (int): Sampling rate of audio signal, e.g. ``44100`` (Hz).
3127
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``400``.
3128
+ win_length (int, optional): Window size. Default: ``None``, will use `n_fft` .
3129
+ hop_length (int, optional): Length of hop between STFT windows. Default: ``None``, will use `win_length // 2` .
3130
+ pad (int, optional): Two sided padding of signal. Default: ``0``.
3131
+ window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
3132
+ can be ``WindowType.BARTLETT``, ``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN``
3133
+ or ``WindowType.KAISER``. Default: ``WindowType.HANN``.
3134
+
3135
+ Raises:
3136
+ TypeError: If `sample_rate` is not of type int.
3137
+ ValueError: If `sample_rate` is a negative number.
3138
+ TypeError: If `n_fft` is not of type int.
3139
+ ValueError: If `n_fft` is not a positive number.
3140
+ TypeError: If `win_length` is not of type int.
3141
+ ValueError: If `win_length` is not a positive number.
3142
+ ValueError: If `win_length` is greater than `n_fft` .
3143
+ TypeError: If `hop_length` is not of type int.
3144
+ ValueError: If `hop_length` is not a positive number.
3145
+ TypeError: If `pad` is not of type int.
3146
+ ValueError: If `pad` is a negative number.
3147
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
3148
+ RuntimeError: If input tensor is not in shape of <..., time>.
3149
+
3150
+ Supported Platforms:
3151
+ ``CPU``
3152
+
3153
+ Examples:
3154
+ >>> import numpy as np
3155
+ >>> import mindspore.dataset as ds
3156
+ >>> import mindspore.dataset.audio as audio
3157
+ >>>
3158
+ >>> # Use the transform in dataset pipeline mode
3159
+ >>> waveform = np.random.random([5, 10, 20]) # 5 samples
3160
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3161
+ >>> transforms = [audio.SpectralCentroid(44100)]
3162
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3163
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3164
+ ... print(item["audio"].shape, item["audio"].dtype)
3165
+ ... break
3166
+ (10, 1, 1) float64
3167
+ >>>
3168
+ >>> # Use the transform in eager mode
3169
+ >>> waveform = np.random.random([10, 20]) # 1 sample
3170
+ >>> output = audio.SpectralCentroid(44100)(waveform)
3171
+ >>> print(output.shape, output.dtype)
3172
+ (10, 1, 1) float64
3173
+
3174
+ Tutorial Examples:
3175
+ - `Illustration of audio transforms
3176
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3177
+ """
3178
+
3179
+ @check_spectral_centroid
3180
+ def __init__(self, sample_rate, n_fft=400, win_length=None, hop_length=None, pad=0, window=WindowType.HANN):
3181
+ super().__init__()
3182
+ self.sample_rate = sample_rate
3183
+ self.pad = pad
3184
+ self.window = window
3185
+ self.n_fft = n_fft
3186
+ self.win_length = win_length if win_length else n_fft
3187
+ self.hop_length = hop_length if hop_length else self.win_length // 2
3188
+
3189
+ def parse(self):
3190
+ return cde.SpectralCentroidOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length,
3191
+ self.pad, DE_C_WINDOW_TYPE.get(self.window))
3192
+
3193
+
3194
+ class Spectrogram(TensorOperation):
3195
+ """
3196
+ Create a spectrogram from an audio signal.
3197
+
3198
+ Args:
3199
+ n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``400``.
3200
+ win_length (int, optional): Window size. Default: ``None``, will use `n_fft` .
3201
+ hop_length (int, optional): Length of hop between STFT windows. Default: ``None``, will use `win_length // 2` .
3202
+ pad (int, optional): Two sided padding of signal. Default: ``0``.
3203
+ window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
3204
+ can be ``WindowType.BARTLETT``, ``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN``
3205
+ or ``WindowType.KAISER``. Currently, Kaiser window is not supported on macOS. Default: ``WindowType.HANN``.
3206
+ power (float, optional): Exponent for the magnitude spectrogram, must be non negative,
3207
+ e.g., ``1`` for energy, ``2`` for power, etc. Default: ``2.0``.
3208
+ normalized (bool, optional): Whether to normalize by magnitude after stft. Default: ``False``.
3209
+ center (bool, optional): Whether to pad waveform on both sides. Default: ``True``.
3210
+ pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
3211
+ can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
3212
+ Default: ``BorderType.REFLECT``.
3213
+ onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: ``True``.
3214
+
3215
+ Raises:
3216
+ TypeError: If `n_fft` is not of type int.
3217
+ ValueError: If `n_fft` is not a positive number.
3218
+ TypeError: If `win_length` is not of type int.
3219
+ ValueError: If `win_length` is not a positive number.
3220
+ ValueError: If `win_length` is greater than `n_fft` .
3221
+ TypeError: If `hop_length` is not of type int.
3222
+ ValueError: If `hop_length` is not a positive number.
3223
+ TypeError: If `pad` is not of type int.
3224
+ ValueError: If `pad` is a negative number.
3225
+ TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
3226
+ TypeError: If `power` is not of type float.
3227
+ ValueError: If `power` is a negative number.
3228
+ TypeError: If `normalized` is not of type bool.
3229
+ TypeError: If `center` is not of type bool.
3230
+ TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
3231
+ TypeError: If `onesided` is not of type bool.
3232
+ RuntimeError: If input tensor is not in shape of <..., time>.
3233
+
3234
+ Supported Platforms:
3235
+ ``CPU``
3236
+
3237
+ Examples:
3238
+ >>> import numpy as np
3239
+ >>> import mindspore.dataset as ds
3240
+ >>> import mindspore.dataset.audio as audio
3241
+ >>>
3242
+ >>> # Use the transform in dataset pipeline mode
3243
+ >>> waveform = np.random.random([5, 10, 20]) # 5 samples
3244
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3245
+ >>> transforms = [audio.Spectrogram()]
3246
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3247
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3248
+ ... print(item["audio"].shape, item["audio"].dtype)
3249
+ ... break
3250
+ (10, 201, 1) float64
3251
+ >>>
3252
+ >>> # Use the transform in eager mode
3253
+ >>> waveform = np.random.random([10, 20]) # 1 sample
3254
+ >>> output = audio.Spectrogram()(waveform)
3255
+ >>> print(output.shape, output.dtype)
3256
+ (10, 201, 1) float64
3257
+
3258
+ Tutorial Examples:
3259
+ - `Illustration of audio transforms
3260
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3261
+ """
3262
+
3263
+ @check_spectrogram
3264
+ def __init__(self, n_fft=400, win_length=None, hop_length=None, pad=0, window=WindowType.HANN, power=2.0,
3265
+ normalized=False, center=True, pad_mode=BorderType.REFLECT, onesided=True):
3266
+ super().__init__()
3267
+ self.n_fft = n_fft
3268
+ self.win_length = win_length if win_length else n_fft
3269
+ self.hop_length = hop_length if hop_length else self.win_length // 2
3270
+ self.pad = pad
3271
+ self.window = window
3272
+ self.power = power
3273
+ self.normalized = normalized
3274
+ self.center = center
3275
+ self.pad_mode = pad_mode
3276
+ self.onesided = onesided
3277
+
3278
+ def parse(self):
3279
+ return cde.SpectrogramOperation(self.n_fft, self.win_length, self.hop_length, self.pad,
3280
+ DE_C_WINDOW_TYPE.get(self.window), self.power, self.normalized,
3281
+ self.center, DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided)
3282
+
3283
+
3284
+ class TimeMasking(AudioTensorOperation):
3285
+ """
3286
+ Apply masking to a spectrogram in the time domain.
3287
+
3288
+ Note:
3289
+ The shape of the audio waveform to be processed needs to be <..., freq, time>.
3290
+
3291
+ Args:
3292
+ iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: ``False``.
3293
+ time_mask_param (int, optional): When `iid_masks` is ``True``, length of the mask will be uniformly sampled
3294
+ from [0, time_mask_param]; When `iid_masks` is ``False``, directly use it as length of the mask.
3295
+ The value should be in range of [0, time_length], where `time_length` is the length of audio waveform
3296
+ in time domain. Default: ``0``.
3297
+ mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is ``True``.
3298
+ The value should be in range of [0, time_length - time_mask_param], where `time_length` is
3299
+ the length of audio waveform in time domain. Default: ``0``.
3300
+ mask_value (float, optional): Value to assign to the masked columns. Default: ``0.0``.
3301
+
3302
+ Raises:
3303
+ TypeError: If `iid_masks` is not of type bool.
3304
+ TypeError: If `time_mask_param` is not of type int.
3305
+ ValueError: If `time_mask_param` is greater than the length of audio waveform in time domain.
3306
+ TypeError: If `mask_start` is not of type int.
3307
+ ValueError: If `mask_start` a negative number.
3308
+ TypeError: If `mask_value` is not of type float.
3309
+ ValueError: If `mask_value` is a negative number.
3310
+ RuntimeError: If input tensor is not in shape of <..., freq, time>.
3311
+
3312
+ Supported Platforms:
3313
+ ``CPU``
3314
+
3315
+ Examples:
3316
+ >>> import numpy as np
3317
+ >>> import mindspore.dataset as ds
3318
+ >>> import mindspore.dataset.audio as audio
3319
+ >>>
3320
+ >>> # Use the transform in dataset pipeline mode
3321
+ >>> waveform = np.random.random([5, 16, 2]) # 5 samples
3322
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3323
+ >>> transforms = [audio.TimeMasking(time_mask_param=1)]
3324
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3325
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3326
+ ... print(item["audio"].shape, item["audio"].dtype)
3327
+ ... break
3328
+ (16, 2) float64
3329
+ >>>
3330
+ >>> # Use the transform in eager mode
3331
+ >>> waveform = np.random.random([16, 2]) # 1 sample
3332
+ >>> output = audio.TimeMasking(time_mask_param=1)(waveform)
3333
+ >>> print(output.shape, output.dtype)
3334
+ (16, 2) float64
3335
+
3336
+ Tutorial Examples:
3337
+ - `Illustration of audio transforms
3338
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3339
+
3340
+ .. image:: time_masking_original.png
3341
+
3342
+ .. image:: time_masking.png
3343
+ """
3344
+
3345
+ @check_masking
3346
+ def __init__(self, iid_masks=False, time_mask_param=0, mask_start=0, mask_value=0.0):
3347
+ super().__init__()
3348
+ self.iid_masks = iid_masks
3349
+ self.time_mask_param = time_mask_param
3350
+ self.mask_start = mask_start
3351
+ self.mask_value = mask_value
3352
+
3353
+ def parse(self):
3354
+ return cde.TimeMaskingOperation(self.iid_masks, self.time_mask_param, self.mask_start, self.mask_value)
3355
+
3356
+
3357
+ class TimeStretch(AudioTensorOperation):
3358
+ """
3359
+ Stretch Short Time Fourier Transform (STFT) in time without modifying pitch for a given rate.
3360
+
3361
+ Note:
3362
+ The shape of the audio waveform to be processed needs to be <..., freq, time, complex=2>.
3363
+ The first dimension represents the real part while the second represents the imaginary.
3364
+
3365
+ Args:
3366
+ hop_length (int, optional): Length of hop between STFT windows, i.e. the number of samples
3367
+ between consecutive frames. Default: ``None``, will use `n_freq - 1` .
3368
+ n_freq (int, optional): Number of filter banks from STFT. Default: ``201``.
3369
+ fixed_rate (float, optional): Rate to speed up or slow down by. Default: ``None``, will keep
3370
+ the original rate.
3371
+
3372
+ Raises:
3373
+ TypeError: If `hop_length` is not of type int.
3374
+ ValueError: If `hop_length` is not a positive number.
3375
+ TypeError: If `n_freq` is not of type int.
3376
+ ValueError: If `n_freq` is not a positive number.
3377
+ TypeError: If `fixed_rate` is not of type float.
3378
+ ValueError: If `fixed_rate` is not a positive number.
3379
+ RuntimeError: If input tensor is not in shape of <..., freq, num_frame, complex=2>.
3380
+
3381
+ Supported Platforms:
3382
+ ``CPU``
3383
+
3384
+ Examples:
3385
+ >>> import numpy as np
3386
+ >>> import mindspore.dataset as ds
3387
+ >>> import mindspore.dataset.audio as audio
3388
+ >>>
3389
+ >>> # Use the transform in dataset pipeline mode
3390
+ >>> waveform = np.random.random([5, 16, 8, 2]) # 5 samples
3391
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3392
+ >>> transforms = [audio.TimeStretch()]
3393
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3394
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3395
+ ... print(item["audio"].shape, item["audio"].dtype)
3396
+ ... break
3397
+ (1, 16, 8, 2) float64
3398
+ >>>
3399
+ >>> # Use the transform in eager mode
3400
+ >>> waveform = np.random.random([16, 8, 2]) # 1 sample
3401
+ >>> output = audio.TimeStretch()(waveform)
3402
+ >>> print(output.shape, output.dtype)
3403
+ (1, 16, 8, 2) float64
3404
+
3405
+ Tutorial Examples:
3406
+ - `Illustration of audio transforms
3407
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3408
+
3409
+ .. image:: time_stretch_rate1.5.png
3410
+
3411
+ .. image:: time_stretch_original.png
3412
+
3413
+ .. image:: time_stretch_rate0.8.png
3414
+ """
3415
+
3416
+ @check_time_stretch
3417
+ def __init__(self, hop_length=None, n_freq=201, fixed_rate=None):
3418
+ super().__init__()
3419
+ self.n_freq = n_freq
3420
+ self.fixed_rate = fixed_rate
3421
+
3422
+ n_fft = (n_freq - 1) * 2
3423
+ self.hop_length = hop_length if hop_length is not None else n_fft // 2
3424
+ self.fixed_rate = fixed_rate if fixed_rate is not None else 1
3425
+
3426
+ def parse(self):
3427
+ return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate)
3428
+
3429
+
3430
+ class TrebleBiquad(AudioTensorOperation):
3431
+ """
3432
+ Design a treble tone-control effect.
3433
+
3434
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
3435
+
3436
+ Args:
3437
+ sample_rate (int): Sampling rate (in Hz), which can't be zero.
3438
+ gain (float): Desired gain at the boost (or attenuation) in dB.
3439
+ central_freq (float, optional): Central frequency (in Hz). Default: ``3000``.
3440
+ Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
3441
+ in range of (0, 1]. Default: ``0.707``.
3442
+
3443
+ Raises:
3444
+ TypeError: If `sample_rate` is not of type int.
3445
+ ValueError: If `sample_rate` is 0.
3446
+ TypeError: If `gain` is not of type float.
3447
+ TypeError: If `central_freq` is not of type float.
3448
+ TypeError: If `Q` is not of type float.
3449
+ ValueError: If `Q` is not in range of (0, 1].
3450
+ RuntimeError: If input tensor is not in shape of <..., time>.
3451
+
3452
+ Supported Platforms:
3453
+ ``CPU``
3454
+
3455
+ Examples:
3456
+ >>> import numpy as np
3457
+ >>> import mindspore.dataset as ds
3458
+ >>> import mindspore.dataset.audio as audio
3459
+ >>>
3460
+ >>> # Use the transform in dataset pipeline mode
3461
+ >>> waveform = np.random.random([5, 20]) # 5 samples
3462
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3463
+ >>> transforms = [audio.TrebleBiquad(44100, 200.0)]
3464
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3465
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3466
+ ... print(item["audio"].shape, item["audio"].dtype)
3467
+ ... break
3468
+ (20,) float64
3469
+ >>>
3470
+ >>> # Use the transform in eager mode
3471
+ >>> waveform = np.random.random([20]) # 1 sample
3472
+ >>> output = audio.TrebleBiquad(44100, 200.0)(waveform)
3473
+ >>> print(output.shape, output.dtype)
3474
+ (20,) float64
3475
+
3476
+ Tutorial Examples:
3477
+ - `Illustration of audio transforms
3478
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3479
+ """
3480
+
3481
+ @check_treble_biquad
3482
+ def __init__(self, sample_rate, gain, central_freq=3000, Q=0.707):
3483
+ super().__init__()
3484
+ self.sample_rate = sample_rate
3485
+ self.gain = gain
3486
+ self.central_freq = central_freq
3487
+ self.quality_factor = Q
3488
+
3489
+ def parse(self):
3490
+ return cde.TrebleBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.quality_factor)
3491
+
3492
+
3493
+ class Vad(AudioTensorOperation):
3494
+ """
3495
+ Voice activity detector.
3496
+
3497
+ Attempt to trim silence and quiet background sounds from the ends of recordings of speech.
3498
+
3499
+ Similar to `SoX <https://sourceforge.net/projects/sox/>`_ implementation.
3500
+
3501
+ Args:
3502
+ sample_rate (int): Sampling rate of audio signal.
3503
+ trigger_level (float, optional): The measurement level used to trigger activity detection. Default: ``7.0``.
3504
+ trigger_time (float, optional): The time constant (in seconds) used to help ignore short bursts of
3505
+ sounds. Default: ``0.25``.
3506
+ search_time (float, optional): The amount of audio (in seconds) to search for quieter/shorter bursts of audio
3507
+ to include prior to the detected trigger point. Default: ``1.0``.
3508
+ allowed_gap (float, optional): The allowed gap (in seconds) between quieter/shorter bursts of audio to include
3509
+ prior to the detected trigger point. Default: ``0.25``.
3510
+ pre_trigger_time (float, optional): The amount of audio (in seconds) to preserve before the trigger point and
3511
+ any found quieter/shorter bursts. Default: ``0.0``.
3512
+ boot_time (float, optional): The time for the initial noise estimate. Default: ``0.35``.
3513
+ noise_up_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level is
3514
+ increasing. Default: ``0.1``.
3515
+ noise_down_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level
3516
+ is decreasing. Default: ``0.01``.
3517
+ noise_reduction_amount (float, optional): Amount of noise reduction to use in the detection algorithm.
3518
+ Default: 1.35.
3519
+ measure_freq (float, optional): Frequency of the algorithm's processing/measurements. Default: ``20.0``.
3520
+ measure_duration (float, optional): The duration of measurement. Default: ``None``,
3521
+ will use twice the measurement period.
3522
+ measure_smooth_time (float, optional): Time constant used to smooth spectral measurements. Default: ``0.4``.
3523
+ hp_filter_freq (float, optional): The 'Brick-wall' frequency of high-pass filter applied at the input to the
3524
+ detector algorithm. Default: ``50.0``.
3525
+ lp_filter_freq (float, optional): The 'Brick-wall' frequency of low-pass filter applied at the input to the
3526
+ detector algorithm. Default: ``6000.0``.
3527
+ hp_lifter_freq (float, optional): The 'Brick-wall' frequency of high-pass lifter used in the
3528
+ detector algorithm. Default: ``150.0``.
3529
+ lp_lifter_freq (float, optional): The 'Brick-wall' frequency of low-pass lifter used in the
3530
+ detector algorithm. Default: ``2000.0``.
3531
+
3532
+ Raises:
3533
+ TypeError: If `sample_rate` is not of type int.
3534
+ ValueError: If `sample_rate` is not a positive number.
3535
+ TypeError: If `trigger_level` is not of type float.
3536
+ TypeError: If `trigger_time` is not of type float.
3537
+ ValueError: If `trigger_time` is a negative number.
3538
+ TypeError: If `search_time` is not of type float.
3539
+ ValueError: If `search_time` is a negative number.
3540
+ TypeError: If `allowed_gap` is not of type float.
3541
+ ValueError: If `allowed_gap` is a negative number.
3542
+ TypeError: If `pre_trigger_time` is not of type float.
3543
+ ValueError: If `pre_trigger_time` is a negative number.
3544
+ TypeError: If `boot_time` is not of type float.
3545
+ ValueError: If `boot_time` is a negative number.
3546
+ TypeError: If `noise_up_time` is not of type float.
3547
+ ValueError: If `noise_up_time` is a negative number.
3548
+ TypeError: If `noise_down_time` is not of type float.
3549
+ ValueError: If `noise_down_time` is a negative number.
3550
+ ValueError: If `noise_up_time` is less than `noise_down_time` .
3551
+ TypeError: If `noise_reduction_amount` is not of type float.
3552
+ ValueError: If `noise_reduction_amount` is a negative number.
3553
+ TypeError: If `measure_freq` is not of type float.
3554
+ ValueError: If `measure_freq` is not a positive number.
3555
+ TypeError: If `measure_duration` is not of type float.
3556
+ ValueError: If `measure_duration` is a negative number.
3557
+ TypeError: If `measure_smooth_time` is not of type float.
3558
+ ValueError: If `measure_smooth_time` is a negative number.
3559
+ TypeError: If `hp_filter_freq` is not of type float.
3560
+ ValueError: If `hp_filter_freq` is not a positive number.
3561
+ TypeError: If `lp_filter_freq` is not of type float.
3562
+ ValueError: If `lp_filter_freq` is not a positive number.
3563
+ TypeError: If `hp_lifter_freq` is not of type float.
3564
+ ValueError: If `hp_lifter_freq` is not a positive number.
3565
+ TypeError: If `lp_lifter_freq` is not of type float.
3566
+ ValueError: If `lp_lifter_freq` is not a positive number.
3567
+ RuntimeError: If input tensor is not in shape of <..., time>.
3568
+
3569
+ Supported Platforms:
3570
+ ``CPU``
3571
+
3572
+ Examples:
3573
+ >>> import numpy as np
3574
+ >>> import mindspore.dataset as ds
3575
+ >>> import mindspore.dataset.audio as audio
3576
+ >>>
3577
+ >>> # Use the transform in dataset pipeline mode
3578
+ >>> waveform = np.random.random([5, 1000]) # 5 samples
3579
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3580
+ >>> transforms = [audio.Vad(sample_rate=600)]
3581
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3582
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3583
+ ... print(item["audio"].shape, item["audio"].dtype)
3584
+ ... break
3585
+ (660,) float64
3586
+ >>>
3587
+ >>> # Use the transform in eager mode
3588
+ >>> waveform = np.random.random([1000]) # 1 sample
3589
+ >>> output = audio.Vad(sample_rate=600)(waveform)
3590
+ >>> print(output.shape, output.dtype)
3591
+ (660,) float64
3592
+
3593
+ Tutorial Examples:
3594
+ - `Illustration of audio transforms
3595
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3596
+ """
3597
+
3598
+ @check_vad
3599
+ def __init__(self, sample_rate, trigger_level=7.0, trigger_time=0.25, search_time=1.0, allowed_gap=0.25,
3600
+ pre_trigger_time=0.0, boot_time=0.35, noise_up_time=0.1, noise_down_time=0.01,
3601
+ noise_reduction_amount=1.35, measure_freq=20.0, measure_duration=None, measure_smooth_time=0.4,
3602
+ hp_filter_freq=50.0, lp_filter_freq=6000.0, hp_lifter_freq=150.0, lp_lifter_freq=2000.0):
3603
+ super().__init__()
3604
+ self.sample_rate = sample_rate
3605
+ self.trigger_level = trigger_level
3606
+ self.trigger_time = trigger_time
3607
+ self.search_time = search_time
3608
+ self.allowed_gap = allowed_gap
3609
+ self.pre_trigger_time = pre_trigger_time
3610
+ self.boot_time = boot_time
3611
+ self.noise_up_time = noise_up_time
3612
+ self.noise_down_time = noise_down_time
3613
+ self.noise_reduction_amount = noise_reduction_amount
3614
+ self.measure_freq = measure_freq
3615
+ self.measure_duration = measure_duration if measure_duration else 2.0 / measure_freq
3616
+ self.measure_smooth_time = measure_smooth_time
3617
+ self.hp_filter_freq = hp_filter_freq
3618
+ self.lp_filter_freq = lp_filter_freq
3619
+ self.hp_lifter_freq = hp_lifter_freq
3620
+ self.lp_lifter_freq = lp_lifter_freq
3621
+
3622
+ def parse(self):
3623
+ return cde.VadOperation(self.sample_rate, self.trigger_level, self.trigger_time, self.search_time,
3624
+ self.allowed_gap, self.pre_trigger_time, self.boot_time, self.noise_up_time,
3625
+ self.noise_down_time, self.noise_reduction_amount, self.measure_freq,
3626
+ self.measure_duration, self.measure_smooth_time, self.hp_filter_freq,
3627
+ self.lp_filter_freq, self.hp_lifter_freq, self.lp_lifter_freq)
3628
+
3629
+
3630
+ DE_C_GAIN_TYPE = {GainType.AMPLITUDE: cde.GainType.DE_GAIN_TYPE_AMPLITUDE,
3631
+ GainType.POWER: cde.GainType.DE_GAIN_TYPE_POWER,
3632
+ GainType.DB: cde.GainType.DE_GAIN_TYPE_DB}
3633
+
3634
+
3635
+ class Vol(AudioTensorOperation):
3636
+ """
3637
+ Adjust volume of waveform.
3638
+
3639
+ Args:
3640
+ gain (float): Gain at the boost (or attenuation).
3641
+ If `gain_type` is ``GainType.AMPLITUDE``, it is a non negative amplitude ratio.
3642
+ If `gain_type` is ``GainType.POWER``, it is a power (voltage squared).
3643
+ If `gain_type` is ``GainType.DB``, it is in decibels.
3644
+ gain_type (GainType, optional): Type of gain, can be ``GainType.AMPLITUDE``, ``GainType.POWER``
3645
+ or ``GainType.DB``. Default: ``GainType.AMPLITUDE``.
3646
+
3647
+ Raises:
3648
+ TypeError: If `gain` is not of type float.
3649
+ TypeError: If `gain_type` is not of type :class:`mindspore.dataset.audio.GainType` .
3650
+ ValueError: If `gain` is a negative number when `gain_type` is ``GainType.AMPLITUDE``.
3651
+ ValueError: If `gain` is not a positive number when `gain_type` is ``GainType.POWER``.
3652
+ RuntimeError: If input tensor is not in shape of <..., time>.
3653
+
3654
+ Supported Platforms:
3655
+ ``CPU``
3656
+
3657
+ Examples:
3658
+ >>> import numpy as np
3659
+ >>> import mindspore.dataset as ds
3660
+ >>> import mindspore.dataset.audio as audio
3661
+ >>>
3662
+ >>> # Use the transform in dataset pipeline mode
3663
+ >>> waveform = np.random.random([5, 30]) # 5 sample
3664
+ >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
3665
+ >>> transforms = [audio.Vol(gain=10, gain_type=audio.GainType.DB)]
3666
+ >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
3667
+ >>> for item in numpy_slices_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
3668
+ ... print(item["audio"].shape, item["audio"].dtype)
3669
+ ... break
3670
+ (30,) float64
3671
+ >>>
3672
+ >>> # Use the transform in eager mode
3673
+ >>> waveform = np.random.random([30]) # 1 sample
3674
+ >>> output = audio.Vol(gain=10, gain_type=audio.GainType.DB)(waveform)
3675
+ >>> print(output.shape, output.dtype)
3676
+ (30,) float64
3677
+
3678
+ Tutorial Examples:
3679
+ - `Illustration of audio transforms
3680
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/audio_gallery.html>`_
3681
+ """
3682
+
3683
+ @check_vol
3684
+ def __init__(self, gain, gain_type=GainType.AMPLITUDE):
3685
+ super().__init__()
3686
+ self.gain = gain
3687
+ self.gain_type = gain_type
3688
+
3689
+ def parse(self):
3690
+ return cde.VolOperation(self.gain, DE_C_GAIN_TYPE.get(self.gain_type))