mindspore 1.10.0__cp39-cp39-win_amd64.whl → 2.0.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/ConcurrencyCheck.dll +0 -0
- mindspore/CppBuildInsights.dll +0 -0
- mindspore/CppCoreCheck.dll +0 -0
- mindspore/EnumIndex.dll +0 -0
- mindspore/EspXEngine.dll +0 -0
- mindspore/HResultCheck.dll +0 -0
- mindspore/KernelTraceControl.dll +0 -0
- mindspore/LocalESPC.dll +0 -0
- mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
- mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
- mindspore/VariantClear.dll +0 -0
- mindspore/__init__.py +9 -4
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/amp.py +53 -58
- mindspore/atlprov.dll +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/cfgpersist.dll +0 -0
- mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
- mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +291 -56
- mindspore/d3dcompiler_47.dll +0 -0
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/dpcmi.dll +0 -0
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/perf_msvcbuildinsights.dll +0 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/libatomic-1.dll +0 -0
- mindspore/libgcc_s_seh-1.dll +0 -0
- mindspore/libgfortran-4.dll +0 -0
- mindspore/libgomp-1.dll +0 -0
- mindspore/libjpeg-62.dll +0 -0
- mindspore/libmindspore.dll +0 -0
- mindspore/libmindspore_common.dll +0 -0
- mindspore/libmindspore_core.dll +0 -0
- mindspore/libmindspore_glog.dll +0 -0
- mindspore/libnnacl.dll +0 -0
- mindspore/libopencv_core452.dll +0 -0
- mindspore/libopencv_imgcodecs452.dll +0 -0
- mindspore/libopencv_imgproc452.dll +0 -0
- mindspore/libquadmath-0.dll +0 -0
- mindspore/libsqlite3.dll +0 -0
- mindspore/libssp-0.dll +0 -0
- mindspore/libstdc++-6.dll +0 -0
- mindspore/libtinyxml2.dll +0 -0
- mindspore/libturbojpeg.dll +0 -0
- mindspore/libwinpthread-1.dll +0 -0
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -22,15 +22,16 @@ their training models.
|
|
|
22
22
|
import numpy as np
|
|
23
23
|
|
|
24
24
|
import mindspore._c_dataengine as cde
|
|
25
|
-
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation,
|
|
26
|
-
ResampleMethod, ScaleType, WindowType
|
|
25
|
+
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
|
|
26
|
+
NormType, ResampleMethod, ScaleType, WindowType
|
|
27
27
|
from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
|
|
28
28
|
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
|
|
29
29
|
check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
|
|
30
30
|
check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
|
|
31
|
-
check_highpass_biquad, check_inverse_mel_scale,
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
check_highpass_biquad, check_inverse_mel_scale, check_inverse_spectrogram, check_lfcc, check_lfilter, \
|
|
32
|
+
check_lowpass_biquad, check_magphase, check_mask_along_axis, check_mask_along_axis_iid, check_masking, \
|
|
33
|
+
check_mel_scale, check_mel_spectrogram, check_mfcc, check_mu_law_coding, check_overdrive, check_phase_vocoder, \
|
|
34
|
+
check_phaser, check_pitch_shift, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
|
|
34
35
|
check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vad, check_vol
|
|
35
36
|
from ..transforms.py_transforms_util import Implementation
|
|
36
37
|
from ..transforms.transforms import TensorOperation
|
|
@@ -68,7 +69,7 @@ class AllpassBiquad(AudioTensorOperation):
|
|
|
68
69
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
69
70
|
|
|
70
71
|
Note:
|
|
71
|
-
The
|
|
72
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
72
73
|
|
|
73
74
|
Args:
|
|
74
75
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -77,7 +78,7 @@ class AllpassBiquad(AudioTensorOperation):
|
|
|
77
78
|
in range of (0, 1]. Default: 0.707.
|
|
78
79
|
|
|
79
80
|
Raises:
|
|
80
|
-
TypeError: If `sample_rate` is not of type
|
|
81
|
+
TypeError: If `sample_rate` is not of type int.
|
|
81
82
|
ValueError: If `sample_rate` is 0.
|
|
82
83
|
TypeError: If `central_freq` is not of type float.
|
|
83
84
|
TypeError: If `Q` is not of type float.
|
|
@@ -116,22 +117,22 @@ class AmplitudeToDB(AudioTensorOperation):
|
|
|
116
117
|
Turn the input audio waveform from the amplitude/power scale to decibel scale.
|
|
117
118
|
|
|
118
119
|
Note:
|
|
119
|
-
The
|
|
120
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
120
121
|
|
|
121
122
|
Args:
|
|
122
123
|
stype (ScaleType, optional): Scale of the input waveform, which can be
|
|
123
124
|
ScaleType.POWER or ScaleType.MAGNITUDE. Default: ScaleType.POWER.
|
|
124
125
|
ref_value (float, optional): Multiplier reference value for generating
|
|
125
|
-
`db_multiplier
|
|
126
|
+
`db_multiplier` . Default: 1.0. The formula is
|
|
126
127
|
|
|
127
|
-
:math:`\text{db_multiplier} = Log10(max(\text{ref_value}, amin))
|
|
128
|
+
:math:`\text{db_multiplier} = Log10(max(\text{ref_value}, amin))` .
|
|
128
129
|
|
|
129
130
|
amin (float, optional): Lower bound to clamp the input waveform, which must
|
|
130
131
|
be greater than zero. Default: 1e-10.
|
|
131
132
|
top_db (float, optional): Minimum cut-off decibels, which must be non-negative. Default: 80.0.
|
|
132
133
|
|
|
133
134
|
Raises:
|
|
134
|
-
TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.
|
|
135
|
+
TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.ScaleType` .
|
|
135
136
|
TypeError: If `ref_value` is not of type float.
|
|
136
137
|
ValueError: If `ref_value` is not a positive number.
|
|
137
138
|
TypeError: If `amin` is not of type float.
|
|
@@ -170,7 +171,7 @@ class Angle(AudioTensorOperation):
|
|
|
170
171
|
Calculate the angle of complex number sequence.
|
|
171
172
|
|
|
172
173
|
Note:
|
|
173
|
-
The
|
|
174
|
+
The shape of the audio waveform to be processed needs to be <..., complex=2>.
|
|
174
175
|
The first dimension represents the real part while the second represents the imaginary.
|
|
175
176
|
|
|
176
177
|
Raises:
|
|
@@ -203,7 +204,7 @@ class BandBiquad(AudioTensorOperation):
|
|
|
203
204
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
204
205
|
|
|
205
206
|
Note:
|
|
206
|
-
The
|
|
207
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
207
208
|
|
|
208
209
|
Args:
|
|
209
210
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -214,7 +215,7 @@ class BandBiquad(AudioTensorOperation):
|
|
|
214
215
|
If False, uses mode oriented to pitched audio, i.e. voice, singing, or instrumental music. Default: False.
|
|
215
216
|
|
|
216
217
|
Raises:
|
|
217
|
-
TypeError: If `sample_rate` is not of type
|
|
218
|
+
TypeError: If `sample_rate` is not of type int.
|
|
218
219
|
ValueError: If `sample_rate` is 0.
|
|
219
220
|
TypeError: If `central_freq` is not of type float.
|
|
220
221
|
TypeError: If `Q` is not of type float.
|
|
@@ -264,7 +265,7 @@ class BandpassBiquad(AudioTensorOperation):
|
|
|
264
265
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
265
266
|
|
|
266
267
|
Note:
|
|
267
|
-
The
|
|
268
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
268
269
|
|
|
269
270
|
Args:
|
|
270
271
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -275,7 +276,7 @@ class BandpassBiquad(AudioTensorOperation):
|
|
|
275
276
|
If False, uses a constant 0dB peak gain. Default: False.
|
|
276
277
|
|
|
277
278
|
Raises:
|
|
278
|
-
TypeError: If `sample_rate` is not of type
|
|
279
|
+
TypeError: If `sample_rate` is not of type int.
|
|
279
280
|
ValueError: If `sample_rate` is 0.
|
|
280
281
|
TypeError: If `central_freq` is not of type float.
|
|
281
282
|
TypeError: If `Q` is not of type float.
|
|
@@ -323,7 +324,7 @@ class BandrejectBiquad(AudioTensorOperation):
|
|
|
323
324
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
324
325
|
|
|
325
326
|
Note:
|
|
326
|
-
The
|
|
327
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
327
328
|
|
|
328
329
|
Args:
|
|
329
330
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -332,7 +333,7 @@ class BandrejectBiquad(AudioTensorOperation):
|
|
|
332
333
|
in range of (0, 1]. Default: 0.707.
|
|
333
334
|
|
|
334
335
|
Raises:
|
|
335
|
-
TypeError: If `sample_rate` is not of type
|
|
336
|
+
TypeError: If `sample_rate` is not of type int.
|
|
336
337
|
ValueError: If `sample_rate` is 0.
|
|
337
338
|
TypeError: If `central_freq` is not of type float.
|
|
338
339
|
TypeError: If `Q` is not of type float.
|
|
@@ -375,7 +376,7 @@ class BassBiquad(AudioTensorOperation):
|
|
|
375
376
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
376
377
|
|
|
377
378
|
Note:
|
|
378
|
-
The
|
|
379
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
379
380
|
|
|
380
381
|
Args:
|
|
381
382
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -385,7 +386,7 @@ class BassBiquad(AudioTensorOperation):
|
|
|
385
386
|
in range of (0, 1]. Default: 0.707.
|
|
386
387
|
|
|
387
388
|
Raises:
|
|
388
|
-
TypeError: If `sample_rate` is not of type
|
|
389
|
+
TypeError: If `sample_rate` is not of type int.
|
|
389
390
|
ValueError: If `sample_rate` is 0.
|
|
390
391
|
TypeError: If `gain` is not of type float.
|
|
391
392
|
TypeError: If `central_freq` is not of type float.
|
|
@@ -420,15 +421,28 @@ class BassBiquad(AudioTensorOperation):
|
|
|
420
421
|
class Biquad(TensorOperation):
|
|
421
422
|
"""
|
|
422
423
|
Perform a biquad filter of input audio.
|
|
424
|
+
Mathematical fomulas refer to: `Digital_biquad_filter <https://en.wikipedia.org/wiki/Digital_biquad_filter>`_ .
|
|
423
425
|
|
|
424
426
|
Args:
|
|
425
427
|
b0 (float): Numerator coefficient of current input, x[n].
|
|
426
428
|
b1 (float): Numerator coefficient of input one time step ago x[n-1].
|
|
427
429
|
b2 (float): Numerator coefficient of input two time steps ago x[n-2].
|
|
428
|
-
a0 (float): Denominator coefficient of current output y[n], the value can't be
|
|
430
|
+
a0 (float): Denominator coefficient of current output y[n], the value can't be 0, typically 1.
|
|
429
431
|
a1 (float): Denominator coefficient of current output y[n-1].
|
|
430
432
|
a2 (float): Denominator coefficient of current output y[n-2].
|
|
431
433
|
|
|
434
|
+
Raises:
|
|
435
|
+
TypeError: If `b0` is not of type float.
|
|
436
|
+
TypeError: If `b1` is not of type float.
|
|
437
|
+
TypeError: If `b2` is not of type float.
|
|
438
|
+
TypeError: If `a0` is not of type float.
|
|
439
|
+
TypeError: If `a1` is not of type float.
|
|
440
|
+
TypeError: If `a2` is not of type float.
|
|
441
|
+
ValueError: If `a0` is 0.
|
|
442
|
+
|
|
443
|
+
Supported Platforms:
|
|
444
|
+
``CPU``
|
|
445
|
+
|
|
432
446
|
Examples:
|
|
433
447
|
>>> import numpy as np
|
|
434
448
|
>>>
|
|
@@ -456,7 +470,7 @@ class ComplexNorm(AudioTensorOperation):
|
|
|
456
470
|
Compute the norm of complex number sequence.
|
|
457
471
|
|
|
458
472
|
Note:
|
|
459
|
-
The
|
|
473
|
+
The shape of the audio waveform to be processed needs to be <..., complex=2>.
|
|
460
474
|
The first dimension represents the real part while the second represents the imaginary.
|
|
461
475
|
|
|
462
476
|
Args:
|
|
@@ -498,25 +512,37 @@ DE_C_BORDER_TYPE = {
|
|
|
498
512
|
|
|
499
513
|
class ComputeDeltas(AudioTensorOperation):
|
|
500
514
|
r"""
|
|
501
|
-
Compute delta coefficients of a spectrogram.
|
|
515
|
+
Compute delta coefficients, also known as differential coefficients, of a spectrogram.
|
|
516
|
+
|
|
517
|
+
Delta coefficients help to understand the dynamics of the power spectrum. It can be
|
|
518
|
+
computed using the following formula.
|
|
502
519
|
|
|
503
520
|
.. math::
|
|
504
521
|
d_{t}=\frac{{\textstyle\sum_{n=1}^{N}}n(c_{t+n}-c_{t-n})}{2{\textstyle\sum_{n=1}^{N}}n^{2}}
|
|
505
522
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
pad_mode (BorderType, optional): Mode parameter passed to padding (default=BorderType.EDGE).It can be any of
|
|
509
|
-
[BorderType.CONSTANT, BorderType.EDGE, BorderType.REFLECT, BordBorderTypeer.SYMMETRIC].
|
|
523
|
+
where :math:`d_{t}` is the deltas at time :math:`t` , :math:`c_{t}` is the spectrogram coefficients
|
|
524
|
+
at time :math:`t` , :math:`N` is :math:`(\text{win_length} - 1) // 2` .
|
|
510
525
|
|
|
511
|
-
|
|
526
|
+
Args:
|
|
527
|
+
win_length (int, optional): The window length used for computing delta, must be no less than 3. Default: 5.
|
|
528
|
+
pad_mode (BorderType, optional): Mode parameter passed to padding, can be BorderType.CONSTANT, BorderType.EDGE,
|
|
529
|
+
BorderType.REFLECT or BorderType.SYMMETRIC. Default: BorderType.EDGE.
|
|
512
530
|
|
|
513
|
-
- BorderType.
|
|
531
|
+
- BorderType.CONSTANT, pad with a constant value.
|
|
532
|
+
- BorderType.EDGE, pad with the last value on the edge.
|
|
533
|
+
- BorderType.REFLECT, reflect the value on the edge while omitting the last one.
|
|
534
|
+
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
|
|
535
|
+
- BorderType.SYMMETRIC, reflect the value on the edge while repeating the last one.
|
|
536
|
+
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
|
|
514
537
|
|
|
515
|
-
|
|
516
|
-
|
|
538
|
+
Raises:
|
|
539
|
+
TypeError: If `win_length` is not of type int.
|
|
540
|
+
ValueError: If `win_length` is less than 3.
|
|
541
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
542
|
+
RuntimeError: If input tensor is not in shape of <..., freq, time>.
|
|
517
543
|
|
|
518
|
-
|
|
519
|
-
|
|
544
|
+
Supported Platforms:
|
|
545
|
+
``CPU``
|
|
520
546
|
|
|
521
547
|
Examples:
|
|
522
548
|
>>> import numpy as np
|
|
@@ -546,9 +572,8 @@ class Contrast(AudioTensorOperation):
|
|
|
546
572
|
|
|
547
573
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
548
574
|
|
|
549
|
-
|
|
550
575
|
Note:
|
|
551
|
-
The
|
|
576
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
552
577
|
|
|
553
578
|
Args:
|
|
554
579
|
enhancement_amount (float, optional): Controls the amount of the enhancement,
|
|
@@ -589,6 +614,13 @@ class DBToAmplitude(AudioTensorOperation):
|
|
|
589
614
|
ref (float): Reference which the output will be scaled by.
|
|
590
615
|
power (float): If power equals 1, will compute DB to power. If 0.5, will compute DB to amplitude.
|
|
591
616
|
|
|
617
|
+
Raises:
|
|
618
|
+
TypeError: If `ref` is not of type float.
|
|
619
|
+
TypeError: If `power` is not of type float.
|
|
620
|
+
|
|
621
|
+
Supported Platforms:
|
|
622
|
+
``CPU``
|
|
623
|
+
|
|
592
624
|
Examples:
|
|
593
625
|
>>> import numpy as np
|
|
594
626
|
>>>
|
|
@@ -610,12 +642,20 @@ class DBToAmplitude(AudioTensorOperation):
|
|
|
610
642
|
|
|
611
643
|
class DCShift(AudioTensorOperation):
|
|
612
644
|
"""
|
|
613
|
-
Apply a DC shift to the audio.
|
|
645
|
+
Apply a DC shift to the audio. This can be useful to remove DC offset from audio.
|
|
614
646
|
|
|
615
647
|
Args:
|
|
616
648
|
shift (float): The amount to shift the audio, the value must be in the range [-2.0, 2.0].
|
|
617
649
|
limiter_gain (float, optional): Used only on peaks to prevent clipping,
|
|
618
|
-
the value should be much less than 1, such as 0.05 or 0.02.
|
|
650
|
+
the value should be much less than 1, such as 0.05 or 0.02. Default: None, will be set to `shift` .
|
|
651
|
+
|
|
652
|
+
Raises:
|
|
653
|
+
TypeError: If `shift` is not of type float.
|
|
654
|
+
ValueError: If `shift` is not in range [-2.0, 2.0].
|
|
655
|
+
TypeError: If `limiter_gain` is not of type float.
|
|
656
|
+
|
|
657
|
+
Supported Platforms:
|
|
658
|
+
``CPU``
|
|
619
659
|
|
|
620
660
|
Examples:
|
|
621
661
|
>>> import numpy as np
|
|
@@ -638,11 +678,20 @@ class DCShift(AudioTensorOperation):
|
|
|
638
678
|
|
|
639
679
|
class DeemphBiquad(AudioTensorOperation):
|
|
640
680
|
"""
|
|
641
|
-
|
|
681
|
+
Apply Compact Disc (IEC 60908) de-emphasis (a treble attenuation shelving filter) to the audio waveform.
|
|
682
|
+
|
|
683
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
642
684
|
|
|
643
685
|
Args:
|
|
644
|
-
sample_rate (int):
|
|
645
|
-
|
|
686
|
+
sample_rate (int): Sampling rate of the waveform, must be 44100 or 48000 (Hz).
|
|
687
|
+
|
|
688
|
+
Raises:
|
|
689
|
+
TypeError: If `sample_rate` is not of type int.
|
|
690
|
+
ValueError: If `sample_rate` is not 44100 or 48000.
|
|
691
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
692
|
+
|
|
693
|
+
Supported Platforms:
|
|
694
|
+
``CPU``
|
|
646
695
|
|
|
647
696
|
Examples:
|
|
648
697
|
>>> import numpy as np
|
|
@@ -670,13 +719,28 @@ class DetectPitchFrequency(AudioTensorOperation):
|
|
|
670
719
|
|
|
671
720
|
Args:
|
|
672
721
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
|
|
673
|
-
frame_time (float, optional): Duration of a frame, the value must be greater than zero
|
|
722
|
+
frame_time (float, optional): Duration of a frame, the value must be greater than zero. Default: 0.01.
|
|
674
723
|
win_length (int, optional): The window length for median smoothing (in number of frames), the value must be
|
|
675
|
-
greater than zero
|
|
676
|
-
freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero
|
|
677
|
-
|
|
678
|
-
freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero
|
|
679
|
-
|
|
724
|
+
greater than zero. Default: 30.
|
|
725
|
+
freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero.
|
|
726
|
+
Default: 85.
|
|
727
|
+
freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero.
|
|
728
|
+
Default: 3400.
|
|
729
|
+
|
|
730
|
+
Raises:
|
|
731
|
+
TypeError: If `sample_rate` is not of type int.
|
|
732
|
+
ValueError: If `sample_rate` is 0.
|
|
733
|
+
TypeError: If `frame_time` is not of type float.
|
|
734
|
+
ValueError: If `frame_time` is not positive.
|
|
735
|
+
TypeError: If `win_length` is not of type int.
|
|
736
|
+
ValueError: If `win_length` is not positive.
|
|
737
|
+
TypeError: If `freq_low` is not of type int.
|
|
738
|
+
ValueError: If `freq_low` is not positive.
|
|
739
|
+
TypeError: If `freq_high` is not of type int.
|
|
740
|
+
ValueError: If `freq_high` is not positive.
|
|
741
|
+
|
|
742
|
+
Supported Platforms:
|
|
743
|
+
``CPU``
|
|
680
744
|
|
|
681
745
|
Examples:
|
|
682
746
|
>>> import numpy as np
|
|
@@ -714,12 +778,20 @@ class Dither(AudioTensorOperation):
|
|
|
714
778
|
|
|
715
779
|
Args:
|
|
716
780
|
density_function (DensityFunction, optional): The density function of a continuous
|
|
717
|
-
random variable
|
|
781
|
+
random variable, can be DensityFunction.TPDF (Triangular Probability Density Function),
|
|
718
782
|
DensityFunction.RPDF (Rectangular Probability Density Function) or
|
|
719
|
-
DensityFunction.GPDF (Gaussian Probability Density Function)
|
|
720
|
-
|
|
783
|
+
DensityFunction.GPDF (Gaussian Probability Density Function).
|
|
784
|
+
Default: DensityFunction.TPDF.
|
|
721
785
|
noise_shaping (bool, optional): A filtering process that shapes the spectral
|
|
722
|
-
energy of quantisation error
|
|
786
|
+
energy of quantisation error. Default: False.
|
|
787
|
+
|
|
788
|
+
Raises:
|
|
789
|
+
TypeError: If `density_function` is not of type :class:`mindspore.dataset.audio.DensityFunction` .
|
|
790
|
+
TypeError: If `noise_shaping` is not of type bool.
|
|
791
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
792
|
+
|
|
793
|
+
Supported Platforms:
|
|
794
|
+
``CPU``
|
|
723
795
|
|
|
724
796
|
Examples:
|
|
725
797
|
>>> import numpy as np
|
|
@@ -742,13 +814,26 @@ class Dither(AudioTensorOperation):
|
|
|
742
814
|
|
|
743
815
|
class EqualizerBiquad(AudioTensorOperation):
|
|
744
816
|
"""
|
|
745
|
-
Design biquad equalizer filter and perform filtering.
|
|
817
|
+
Design biquad equalizer filter and perform filtering.
|
|
818
|
+
|
|
819
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
746
820
|
|
|
747
821
|
Args:
|
|
748
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be
|
|
822
|
+
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
749
823
|
center_freq (float): Central frequency (in Hz).
|
|
750
824
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
751
|
-
Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]
|
|
825
|
+
Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
|
|
826
|
+
|
|
827
|
+
Raises:
|
|
828
|
+
TypeError: If `sample_rate` is not of type int.
|
|
829
|
+
ValueError: If `sample_rate` is 0.
|
|
830
|
+
TypeError: If `center_freq` is not of type float.
|
|
831
|
+
TypeError: If `gain` is not of type float.
|
|
832
|
+
TypeError: If `Q` is not of type float.
|
|
833
|
+
ValueError: If `Q` is not in range of (0, 1].
|
|
834
|
+
|
|
835
|
+
Supported Platforms:
|
|
836
|
+
``CPU``
|
|
752
837
|
|
|
753
838
|
Examples:
|
|
754
839
|
>>> import numpy as np
|
|
@@ -783,11 +868,10 @@ class Fade(AudioTensorOperation):
|
|
|
783
868
|
Add a fade in and/or fade out to an waveform.
|
|
784
869
|
|
|
785
870
|
Args:
|
|
786
|
-
fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative
|
|
787
|
-
fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative
|
|
788
|
-
fade_shape (FadeShape, optional): Shape of fade
|
|
789
|
-
|
|
790
|
-
FadeShape.EXPONENTIAL.
|
|
871
|
+
fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative. Default: 0.
|
|
872
|
+
fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative. Default: 0.
|
|
873
|
+
fade_shape (FadeShape, optional): Shape of fade, five different types can be chosen as defined in FadeShape.
|
|
874
|
+
Default: FadeShape.LINEAR.
|
|
791
875
|
|
|
792
876
|
-FadeShape.QUARTER_SINE, means it tend to 0 in an quarter sin function.
|
|
793
877
|
|
|
@@ -803,6 +887,9 @@ class Fade(AudioTensorOperation):
|
|
|
803
887
|
RuntimeError: If fade_in_len exceeds waveform length.
|
|
804
888
|
RuntimeError: If fade_out_len exceeds waveform length.
|
|
805
889
|
|
|
890
|
+
Supported Platforms:
|
|
891
|
+
``CPU``
|
|
892
|
+
|
|
806
893
|
Examples:
|
|
807
894
|
>>> import numpy as np
|
|
808
895
|
>>> from mindspore.dataset.audio import FadeShape
|
|
@@ -829,13 +916,20 @@ class Filtfilt(AudioTensorOperation):
|
|
|
829
916
|
Apply an IIR filter forward and backward to a waveform.
|
|
830
917
|
|
|
831
918
|
Args:
|
|
832
|
-
a_coeffs (Sequence):
|
|
919
|
+
a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
|
|
833
920
|
Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
|
|
834
921
|
Must be same size as b_coeffs (pad with 0's as necessary).
|
|
835
|
-
b_coeffs (Sequence):
|
|
922
|
+
b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
|
|
836
923
|
Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
|
|
837
924
|
Must be same size as a_coeffs (pad with 0's as necessary).
|
|
838
|
-
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default
|
|
925
|
+
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True.
|
|
926
|
+
|
|
927
|
+
Raises:
|
|
928
|
+
TypeError: If `a_coeffs` is not of type Sequence[float].
|
|
929
|
+
TypeError: If `b_coeffs` is not of type Sequence[float].
|
|
930
|
+
ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
|
|
931
|
+
TypeError: If `clamp` is not of type bool.
|
|
932
|
+
RuntimeError: If shape of the input audio is not <..., time>.
|
|
839
933
|
|
|
840
934
|
Examples:
|
|
841
935
|
>>> import numpy as np
|
|
@@ -870,18 +964,42 @@ class Flanger(AudioTensorOperation):
|
|
|
870
964
|
"""
|
|
871
965
|
Apply a flanger effect to the audio.
|
|
872
966
|
|
|
967
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
968
|
+
|
|
873
969
|
Args:
|
|
874
970
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
|
|
875
|
-
delay (float, optional): Desired delay in milliseconds
|
|
876
|
-
depth (float, optional): Desired delay depth in milliseconds
|
|
877
|
-
regen (float, optional): Desired regen (feedback gain) in dB, range
|
|
878
|
-
width (float, optional): Desired width (delay gain) in dB, range
|
|
879
|
-
speed (float, optional): Modulation speed in Hz, range
|
|
880
|
-
phase (float, optional): Percentage phase-shift for multi-channel, range
|
|
881
|
-
modulation (Modulation, optional): Modulation
|
|
882
|
-
|
|
883
|
-
interpolation (Interpolation, optional): Interpolation
|
|
884
|
-
|
|
971
|
+
delay (float, optional): Desired delay in milliseconds, in range of [0, 30]. Default: 0.0.
|
|
972
|
+
depth (float, optional): Desired delay depth in milliseconds, in range of [0, 10]. Default: 2.0.
|
|
973
|
+
regen (float, optional): Desired regen (feedback gain) in dB, in range of [-95, 95]. Default: 0.0.
|
|
974
|
+
width (float, optional): Desired width (delay gain) in dB, in range of [0, 100]. Default: 71.0.
|
|
975
|
+
speed (float, optional): Modulation speed in Hz, in range of [0.1, 10]. Default: 0.5.
|
|
976
|
+
phase (float, optional): Percentage phase-shift for multi-channel, in range of [0, 100]. Default: 25.0.
|
|
977
|
+
modulation (Modulation, optional): Modulation method, can be Modulation.SINUSOIDAL or Modulation.TRIANGULAR.
|
|
978
|
+
Default: Modulation.SINUSOIDAL.
|
|
979
|
+
interpolation (Interpolation, optional): Interpolation method, can be Interpolation.LINEAR or
|
|
980
|
+
Interpolation.QUADRATIC. Default: Interpolation.LINEAR.
|
|
981
|
+
|
|
982
|
+
Raises:
|
|
983
|
+
TypeError: If `sample_rate` is not of type int.
|
|
984
|
+
ValueError: If `sample_rate` is zero.
|
|
985
|
+
TypeError: If `delay` is not of type float.
|
|
986
|
+
ValueError: If `delay` is not in range of [0, 30].
|
|
987
|
+
TypeError: If `depth` is not of type float.
|
|
988
|
+
ValueError: If `depth` is not in range of [0, 10].
|
|
989
|
+
TypeError: If `regen` is not of type float.
|
|
990
|
+
ValueError: If `regen` is not in range of [-95, 95].
|
|
991
|
+
TypeError: If `width` is not of type float.
|
|
992
|
+
ValueError: If `width` is not in range of [0, 100].
|
|
993
|
+
TypeError: If `speed` is not of type float.
|
|
994
|
+
ValueError: If `speed` is not in range of [0.1, 10].
|
|
995
|
+
TypeError: If `phase` is not of type float.
|
|
996
|
+
ValueError: If `phase` is not in range of [0, 100].
|
|
997
|
+
TypeError: If `modulation` is not of type :class:`mindspore.dataset.audio.Modulation` .
|
|
998
|
+
TypeError: If `interpolation` is not of type :class:`mindspore.dataset.audio.Interpolation` .
|
|
999
|
+
RuntimeError: If input tensor is not in shape of <..., channel, time>.
|
|
1000
|
+
|
|
1001
|
+
Supported Platforms:
|
|
1002
|
+
``CPU``
|
|
885
1003
|
|
|
886
1004
|
Examples:
|
|
887
1005
|
>>> import numpy as np
|
|
@@ -917,7 +1035,7 @@ class FrequencyMasking(AudioTensorOperation):
|
|
|
917
1035
|
Apply masking to a spectrogram in the frequency domain.
|
|
918
1036
|
|
|
919
1037
|
Note:
|
|
920
|
-
The
|
|
1038
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
921
1039
|
|
|
922
1040
|
Args:
|
|
923
1041
|
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
|
|
@@ -932,9 +1050,9 @@ class FrequencyMasking(AudioTensorOperation):
|
|
|
932
1050
|
|
|
933
1051
|
Raises:
|
|
934
1052
|
TypeError: If `iid_masks` is not of type bool.
|
|
935
|
-
TypeError: If `freq_mask_param` is not of type
|
|
1053
|
+
TypeError: If `freq_mask_param` is not of type int.
|
|
936
1054
|
ValueError: If `freq_mask_param` is greater than the length of audio waveform in frequency domain.
|
|
937
|
-
TypeError: If `mask_start` is not of type
|
|
1055
|
+
TypeError: If `mask_start` is not of type int.
|
|
938
1056
|
ValueError: If `mask_start` is a negative number.
|
|
939
1057
|
TypeError: If `mask_value` is not of type float.
|
|
940
1058
|
ValueError: If `mask_value` is a negative number.
|
|
@@ -974,7 +1092,13 @@ class Gain(AudioTensorOperation):
|
|
|
974
1092
|
Apply amplification or attenuation to the whole waveform.
|
|
975
1093
|
|
|
976
1094
|
Args:
|
|
977
|
-
gain_db (float): Gain adjustment in decibels (dB)
|
|
1095
|
+
gain_db (float): Gain adjustment in decibels (dB). Default: 1.0.
|
|
1096
|
+
|
|
1097
|
+
Raises:
|
|
1098
|
+
TypeError: If `gain_db` is not of type float.
|
|
1099
|
+
|
|
1100
|
+
Supported Platforms:
|
|
1101
|
+
``CPU``
|
|
978
1102
|
|
|
979
1103
|
Examples:
|
|
980
1104
|
>>> import numpy as np
|
|
@@ -996,28 +1120,49 @@ class Gain(AudioTensorOperation):
|
|
|
996
1120
|
|
|
997
1121
|
class GriffinLim(AudioTensorOperation):
|
|
998
1122
|
r"""
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
.. math::
|
|
1002
|
-
x(n)=\frac{\sum_{m=-\infty}^{\infty} w(m S-n) y_{w}(m S, n)}{\sum_{m=-\infty}^{\infty} w^{2}(m S-n)}
|
|
1123
|
+
Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
|
|
1003
1124
|
|
|
1004
|
-
|
|
1005
|
-
|
|
1125
|
+
About Griffin-Lim please refer to `A fast Griffin-Lim algorithm <https://doi.org/10.1109/WASPAA.2013.6701851>`_
|
|
1126
|
+
and `Signal estimation from modified short-time Fourier transform <https://doi.org/10.1109/ICASSP.1983.1172092>`_ .
|
|
1006
1127
|
|
|
1007
1128
|
Args:
|
|
1008
|
-
n_fft (int, optional): Size of FFT
|
|
1009
|
-
n_iter (int, optional): Number of iteration for phase recovery
|
|
1010
|
-
win_length (int, optional): Window size for GriffinLim
|
|
1011
|
-
hop_length (int, optional): Length of hop between STFT windows
|
|
1129
|
+
n_fft (int, optional): Size of FFT. Default: 400.
|
|
1130
|
+
n_iter (int, optional): Number of iteration for phase recovery. Default: 32.
|
|
1131
|
+
win_length (int, optional): Window size for GriffinLim. Default: None, will be set to `n_fft` .
|
|
1132
|
+
hop_length (int, optional): Length of hop between STFT windows.
|
|
1133
|
+
Default: None, will be set to `win_length // 2` .
|
|
1012
1134
|
window_type (WindowType, optional): Window type for GriffinLim, which can be WindowType.BARTLETT,
|
|
1013
|
-
WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN or WindowType.KAISER
|
|
1135
|
+
WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN or WindowType.KAISER. Default: WindowType.HANN.
|
|
1014
1136
|
Currently kaiser window is not supported on macOS.
|
|
1015
|
-
power (float, optional): Exponent for the magnitude spectrogram
|
|
1016
|
-
momentum (float, optional): The momentum for fast Griffin-Lim
|
|
1017
|
-
length (int, optional): Length of the expected output waveform
|
|
1018
|
-
dimension of the stft matrix
|
|
1019
|
-
rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization
|
|
1020
|
-
|
|
1137
|
+
power (float, optional): Exponent for the magnitude spectrogram. Default: 2.0.
|
|
1138
|
+
momentum (float, optional): The momentum for fast Griffin-Lim. Default: 0.99.
|
|
1139
|
+
length (int, optional): Length of the expected output waveform. Default: None, will be set to the value of last
|
|
1140
|
+
dimension of the stft matrix.
|
|
1141
|
+
rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization.
|
|
1142
|
+
Default: True.
|
|
1143
|
+
|
|
1144
|
+
Raises:
|
|
1145
|
+
TypeError: If `n_fft` is not of type int.
|
|
1146
|
+
ValueError: If `n_fft` is not positive.
|
|
1147
|
+
TypeError: If `n_iter` is not of type int.
|
|
1148
|
+
ValueError: If `n_iter` is not positive.
|
|
1149
|
+
TypeError: If `win_length` is not of type int.
|
|
1150
|
+
ValueError: If `win_length` is a negative number.
|
|
1151
|
+
TypeError: If `hop_length` is not of type int.
|
|
1152
|
+
ValueError: If `hop_length` is a negative number.
|
|
1153
|
+
TypeError: If `window_type` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1154
|
+
TypeError: If `power` is not of type float.
|
|
1155
|
+
ValueError: If `power` is not positive.
|
|
1156
|
+
TypeError: If `momentum` is not of type float.
|
|
1157
|
+
ValueError: If `momentum` is a negative number.
|
|
1158
|
+
TypeError: If `length` is not of type int.
|
|
1159
|
+
ValueError: If `length` is a negative number.
|
|
1160
|
+
TypeError: If `rand_init` is not of type bool.
|
|
1161
|
+
RuntimeError: If `n_fft` is not less than `length` .
|
|
1162
|
+
RuntimeError: If `win_length` is not less than `n_fft` .
|
|
1163
|
+
|
|
1164
|
+
Supported Platforms:
|
|
1165
|
+
``CPU``
|
|
1021
1166
|
|
|
1022
1167
|
Examples:
|
|
1023
1168
|
>>> import numpy as np
|
|
@@ -1029,7 +1174,7 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1029
1174
|
"""
|
|
1030
1175
|
|
|
1031
1176
|
@check_griffin_lim
|
|
1032
|
-
def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN, power=2,
|
|
1177
|
+
def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN, power=2.0,
|
|
1033
1178
|
momentum=0.99, length=None, rand_init=True):
|
|
1034
1179
|
super().__init__()
|
|
1035
1180
|
self.n_fft = n_fft
|
|
@@ -1050,12 +1195,25 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1050
1195
|
|
|
1051
1196
|
class HighpassBiquad(AudioTensorOperation):
|
|
1052
1197
|
"""
|
|
1053
|
-
Design biquad highpass filter and perform filtering.
|
|
1198
|
+
Design biquad highpass filter and perform filtering.
|
|
1199
|
+
|
|
1200
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1054
1201
|
|
|
1055
1202
|
Args:
|
|
1056
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be
|
|
1203
|
+
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
1057
1204
|
cutoff_freq (float): Filter cutoff frequency (in Hz).
|
|
1058
|
-
Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]
|
|
1205
|
+
Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
|
|
1206
|
+
|
|
1207
|
+
Raises:
|
|
1208
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1209
|
+
ValueError: If `sample_rate` is 0.
|
|
1210
|
+
TypeError: If `cutoff_freq` is not of type float.
|
|
1211
|
+
TypeError: If `Q` is not of type float.
|
|
1212
|
+
ValueError: If `Q` is not in range of (0, 1].
|
|
1213
|
+
RuntimeError: If the shape of input audio waveform does not match <..., time>.
|
|
1214
|
+
|
|
1215
|
+
Supported Platforms:
|
|
1216
|
+
``CPU``
|
|
1059
1217
|
|
|
1060
1218
|
Examples:
|
|
1061
1219
|
>>> import numpy as np
|
|
@@ -1079,22 +1237,46 @@ class HighpassBiquad(AudioTensorOperation):
|
|
|
1079
1237
|
|
|
1080
1238
|
class InverseMelScale(AudioTensorOperation):
|
|
1081
1239
|
"""
|
|
1082
|
-
Solve for a normal STFT
|
|
1240
|
+
Solve for a normal STFT from a mel frequency STFT, using a conversion matrix.
|
|
1083
1241
|
|
|
1084
1242
|
Args:
|
|
1085
1243
|
n_stft (int): Number of bins in STFT.
|
|
1086
|
-
n_mels (int, optional): Number of mel filterbanks
|
|
1087
|
-
sample_rate (int, optional): Sample rate of audio signal
|
|
1088
|
-
f_min (float, optional): Minimum frequency
|
|
1089
|
-
f_max (float, optional): Maximum frequency
|
|
1090
|
-
max_iter (int, optional): Maximum number of optimization iterations
|
|
1091
|
-
tolerance_loss (float, optional): Value of loss to stop optimization at
|
|
1092
|
-
tolerance_change (float, optional): Difference in losses to stop optimization at
|
|
1093
|
-
sgdargs (dict, optional): Arguments for the SGD optimizer
|
|
1094
|
-
{'sgd_lr': 0.1, 'sgd_momentum': 0.9}
|
|
1095
|
-
norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE
|
|
1096
|
-
|
|
1097
|
-
mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK
|
|
1244
|
+
n_mels (int, optional): Number of mel filterbanks. Default: 128.
|
|
1245
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
|
|
1246
|
+
f_min (float, optional): Minimum frequency. Default: 0.0.
|
|
1247
|
+
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
|
|
1248
|
+
max_iter (int, optional): Maximum number of optimization iterations. Default: 100000.
|
|
1249
|
+
tolerance_loss (float, optional): Value of loss to stop optimization at. Default: 1e-5.
|
|
1250
|
+
tolerance_change (float, optional): Difference in losses to stop optimization at. Default: 1e-8.
|
|
1251
|
+
sgdargs (dict, optional): Arguments for the SGD optimizer. Default: None, will be set to
|
|
1252
|
+
{'sgd_lr': 0.1, 'sgd_momentum': 0.9}.
|
|
1253
|
+
norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE.
|
|
1254
|
+
Default: NormType.NONE, no narmalization.
|
|
1255
|
+
mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1256
|
+
|
|
1257
|
+
Raises:
|
|
1258
|
+
TypeError: If `n_stft` is not of type int.
|
|
1259
|
+
ValueError: If `n_stft` is not positive.
|
|
1260
|
+
TypeError: If `n_mels` is not of type int.
|
|
1261
|
+
ValueError: If `n_mels` is not positive.
|
|
1262
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1263
|
+
ValueError: If `sample_rate` is not positive.
|
|
1264
|
+
TypeError: If `f_min` is not of type float.
|
|
1265
|
+
ValueError: If `f_min` is greater than or equal to `f_max` .
|
|
1266
|
+
TypeError: If `f_max` is not of type float.
|
|
1267
|
+
ValueError: If `f_max` is a negative number.
|
|
1268
|
+
TypeError: If `max_iter` is not of type int.
|
|
1269
|
+
ValueError: If `max_iter` is a negative number.
|
|
1270
|
+
TypeError: If `tolerance_loss` is not of type float.
|
|
1271
|
+
ValueError: If `tolerance_loss` is a negative number.
|
|
1272
|
+
TypeError: If `tolerance_change` is not of type float.
|
|
1273
|
+
ValueError: If `tolerance_change` is a negative number.
|
|
1274
|
+
TypeError: If `sgdargs` is not of type dict.
|
|
1275
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1276
|
+
TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1277
|
+
|
|
1278
|
+
Supported Platforms:
|
|
1279
|
+
``CPU``
|
|
1098
1280
|
|
|
1099
1281
|
Examples:
|
|
1100
1282
|
>>> import numpy as np
|
|
@@ -1130,18 +1312,199 @@ class InverseMelScale(AudioTensorOperation):
|
|
|
1130
1312
|
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
|
|
1131
1313
|
|
|
1132
1314
|
|
|
1315
|
+
class InverseSpectrogram(AudioTensorOperation):
|
|
1316
|
+
"""
|
|
1317
|
+
Create an inverse spectrogram to recover an audio signal from a spectrogram.
|
|
1318
|
+
|
|
1319
|
+
Args:
|
|
1320
|
+
length (int, optional): The output length of the waveform, must be non negative. Default: None,
|
|
1321
|
+
means to output the whole waveform.
|
|
1322
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0.
|
|
1323
|
+
Default: 400.
|
|
1324
|
+
win_length (int, optional): Window size, which should be greater than 0.
|
|
1325
|
+
Default: None, will be set to `n_fft` .
|
|
1326
|
+
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1327
|
+
Default: None, will be set to `win_length // 2` .
|
|
1328
|
+
pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: 0.
|
|
1329
|
+
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1330
|
+
frame/window. Default: WindowType.HANN.
|
|
1331
|
+
normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: False.
|
|
1332
|
+
center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: True.
|
|
1333
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
|
|
1334
|
+
can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
|
|
1335
|
+
Default: BorderType.REFLECT.
|
|
1336
|
+
onesided (bool, optional): Controls whether spectrogram was used to return half of results to avoid
|
|
1337
|
+
redundancy. Default: True.
|
|
1338
|
+
|
|
1339
|
+
Raises:
|
|
1340
|
+
TypeError: If `length` is not of type int.
|
|
1341
|
+
ValueError: If `length` is a negative number.
|
|
1342
|
+
TypeError: If `n_fft` is not of type int.
|
|
1343
|
+
ValueError: If `n_fft` is not positive.
|
|
1344
|
+
TypeError: If `win_length` is not of type int.
|
|
1345
|
+
ValueError: If `win_length` is not positive.
|
|
1346
|
+
TypeError: If `hop_length` is not of type int.
|
|
1347
|
+
ValueError: If `hop_length` is not positive.
|
|
1348
|
+
TypeError: If `pad` is not of type int.
|
|
1349
|
+
ValueError: If `pad` is a negative number.
|
|
1350
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1351
|
+
TypeError: If `normalized` is not of type bool.
|
|
1352
|
+
TypeError: If `center` is not of type bool.
|
|
1353
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
1354
|
+
TypeError: If `onesided` is not of type bool.
|
|
1355
|
+
|
|
1356
|
+
Supported Platforms:
|
|
1357
|
+
``CPU``
|
|
1358
|
+
|
|
1359
|
+
Examples:
|
|
1360
|
+
>>> import numpy as np
|
|
1361
|
+
>>>
|
|
1362
|
+
>>> waveform = np.array([[[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1363
|
+
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]]])
|
|
1364
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1365
|
+
>>> transforms = [audio.InverseSpectrogram(1, 400, 400, 200)]
|
|
1366
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1367
|
+
"""
|
|
1368
|
+
|
|
1369
|
+
@check_inverse_spectrogram
|
|
1370
|
+
def __init__(self, length=None, n_fft=400, win_length=None, hop_length=None, pad=0,
|
|
1371
|
+
window=WindowType.HANN, normalized=False, center=True,
|
|
1372
|
+
pad_mode=BorderType.REFLECT, onesided=True):
|
|
1373
|
+
super().__init__()
|
|
1374
|
+
self.length = length if length is not None else 0
|
|
1375
|
+
self.n_fft = n_fft
|
|
1376
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
1377
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
|
|
1378
|
+
self.pad = pad
|
|
1379
|
+
self.window = window
|
|
1380
|
+
self.normalized = normalized
|
|
1381
|
+
self.center = center
|
|
1382
|
+
self.pad_mode = pad_mode
|
|
1383
|
+
self.onesided = onesided
|
|
1384
|
+
|
|
1385
|
+
def parse(self):
|
|
1386
|
+
return cde.InverseSpectrogramOperation(self.length, self.n_fft, self.win_length, self.hop_length, self.pad,
|
|
1387
|
+
DE_C_WINDOW_TYPE.get(self.window), self.normalized, self.center,
|
|
1388
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided)
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
DE_C_NORM_MODE = {NormMode.ORTHO: cde.NormMode.DE_NORM_MODE_ORTHO,
|
|
1392
|
+
NormMode.NONE: cde.NormMode.DE_NORM_MODE_NONE}
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
class LFCC(AudioTensorOperation):
|
|
1396
|
+
"""
|
|
1397
|
+
Create LFCC for a raw audio signal.
|
|
1398
|
+
|
|
1399
|
+
Note:
|
|
1400
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
1401
|
+
|
|
1402
|
+
Args:
|
|
1403
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
|
|
1404
|
+
n_filter (int, optional) : Number of linear filters to apply. Default: 128.
|
|
1405
|
+
n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: 40.
|
|
1406
|
+
f_min (float, optional): Minimum frequency. Default: 0.0.
|
|
1407
|
+
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
|
|
1408
|
+
dct_type (int, optional) : Type of DCT to use. The value can only be 2. Default: 2.
|
|
1409
|
+
norm (NormMode, optional) : Norm to use. Default: NormMode.ORTHO.
|
|
1410
|
+
log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: False.
|
|
1411
|
+
speckwargs (dict, optional) : Arguments for :class:`mindspore.dataset.audio.Spectrogram`.
|
|
1412
|
+
Default: None, the default setting is a dict including
|
|
1413
|
+
|
|
1414
|
+
- 'n_fft': 400
|
|
1415
|
+
- 'win_length': n_fft
|
|
1416
|
+
- 'hop_length': win_length // 2
|
|
1417
|
+
- 'pad': 0
|
|
1418
|
+
- 'window': WindowType.HANN
|
|
1419
|
+
- 'power': 2.0
|
|
1420
|
+
- 'normalized': False
|
|
1421
|
+
- 'center': True
|
|
1422
|
+
- 'pad_mode': BorderType.REFLECT
|
|
1423
|
+
- 'onesided': True
|
|
1424
|
+
|
|
1425
|
+
Raises:
|
|
1426
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1427
|
+
TypeError: If `n_filter` is not of type int.
|
|
1428
|
+
TypeError: If `n_lfcc` is not of type int.
|
|
1429
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
|
|
1430
|
+
TypeError: If `log_lf` is not of type bool.
|
|
1431
|
+
TypeError: If `speckwargs` is not of type dict.
|
|
1432
|
+
ValueError: If `sample_rate` is 0.
|
|
1433
|
+
ValueError: If `n_lfcc` is less than 0.
|
|
1434
|
+
ValueError: If `f_min` is greater than `f_max` .
|
|
1435
|
+
ValueError: If `f_min` is greater than `sample_rate // 2` when `f_max` is set to None.
|
|
1436
|
+
ValueError: If `dct_type` is not 2.
|
|
1437
|
+
|
|
1438
|
+
Supported Platforms:
|
|
1439
|
+
``CPU``
|
|
1440
|
+
|
|
1441
|
+
Examples:
|
|
1442
|
+
>>> import numpy as np
|
|
1443
|
+
>>> import mindspore.dataset as ds
|
|
1444
|
+
>>> import mindspore.dataset.audio as audio
|
|
1445
|
+
>>>
|
|
1446
|
+
>>> waveform = np.random.random([1, 1, 300])
|
|
1447
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1448
|
+
>>> transforms = [audio.LFCC()]
|
|
1449
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1450
|
+
"""
|
|
1451
|
+
|
|
1452
|
+
@check_lfcc
|
|
1453
|
+
def __init__(self, sample_rate=16000, n_filter=128, n_lfcc=40, f_min=0.0, f_max=None, dct_type=2,
|
|
1454
|
+
norm=NormMode.ORTHO, log_lf=False, speckwargs=None):
|
|
1455
|
+
super().__init__()
|
|
1456
|
+
self.sample_rate = sample_rate
|
|
1457
|
+
self.n_filter = n_filter
|
|
1458
|
+
self.n_lfcc = n_lfcc
|
|
1459
|
+
self.f_min = f_min
|
|
1460
|
+
self.f_max = f_max if f_max is not None else sample_rate // 2
|
|
1461
|
+
self.dct_type = dct_type
|
|
1462
|
+
self.norm = norm
|
|
1463
|
+
self.log_lf = log_lf
|
|
1464
|
+
self.speckwargs = speckwargs
|
|
1465
|
+
if speckwargs is None:
|
|
1466
|
+
self.speckwargs = {}
|
|
1467
|
+
self.speckwargs.setdefault("n_fft", 400)
|
|
1468
|
+
self.speckwargs.setdefault("win_length", self.speckwargs.get("n_fft"))
|
|
1469
|
+
self.speckwargs.setdefault("hop_length", self.speckwargs.get("win_length") // 2)
|
|
1470
|
+
self.speckwargs.setdefault("pad", 0)
|
|
1471
|
+
self.speckwargs.setdefault("window", WindowType.HANN)
|
|
1472
|
+
self.speckwargs.setdefault("power", 2.0)
|
|
1473
|
+
self.speckwargs.setdefault("normalized", False)
|
|
1474
|
+
self.speckwargs.setdefault("center", True)
|
|
1475
|
+
self.speckwargs.setdefault("pad_mode", BorderType.REFLECT)
|
|
1476
|
+
self.speckwargs.setdefault("onesided", True)
|
|
1477
|
+
self.window = self.speckwargs.get("window")
|
|
1478
|
+
self.pad_mode = self.speckwargs.get("pad_mode")
|
|
1479
|
+
|
|
1480
|
+
def parse(self):
|
|
1481
|
+
return cde.LFCCOperation(self.sample_rate, self.n_filter, self.n_lfcc, self.f_min, self.f_max,
|
|
1482
|
+
self.dct_type, DE_C_NORM_MODE.get(self.norm), self.log_lf, self.speckwargs,
|
|
1483
|
+
DE_C_WINDOW_TYPE.get(self.window), DE_C_BORDER_TYPE.get(self.pad_mode))
|
|
1484
|
+
|
|
1485
|
+
|
|
1133
1486
|
class LFilter(AudioTensorOperation):
|
|
1134
1487
|
"""
|
|
1135
|
-
|
|
1488
|
+
Perform an IIR filter by evaluating different equation.
|
|
1136
1489
|
|
|
1137
1490
|
Args:
|
|
1138
|
-
a_coeffs (
|
|
1491
|
+
a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
|
|
1139
1492
|
Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
|
|
1140
1493
|
Must be same size as b_coeffs (pad with 0's as necessary).
|
|
1141
|
-
b_coeffs (
|
|
1494
|
+
b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
|
|
1142
1495
|
Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
|
|
1143
1496
|
Must be same size as a_coeffs (pad with 0's as necessary).
|
|
1144
|
-
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]
|
|
1497
|
+
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True.
|
|
1498
|
+
|
|
1499
|
+
Raises:
|
|
1500
|
+
TypeError: If `a_coeffs` is not of type Sequence[float].
|
|
1501
|
+
TypeError: If `b_coeffs` is not of type Sequence[float].
|
|
1502
|
+
ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
|
|
1503
|
+
TypeError: If `clamp` is not of type bool.
|
|
1504
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1505
|
+
|
|
1506
|
+
Supported Platforms:
|
|
1507
|
+
``CPU``
|
|
1145
1508
|
|
|
1146
1509
|
Examples:
|
|
1147
1510
|
>>> import numpy as np
|
|
@@ -1178,7 +1541,7 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1178
1541
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1179
1542
|
|
|
1180
1543
|
Note:
|
|
1181
|
-
The
|
|
1544
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
1182
1545
|
|
|
1183
1546
|
Args:
|
|
1184
1547
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -1187,7 +1550,7 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1187
1550
|
in range of (0, 1]. Default: 0.707.
|
|
1188
1551
|
|
|
1189
1552
|
Raises:
|
|
1190
|
-
TypeError: If `sample_rate` is not of type
|
|
1553
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1191
1554
|
ValueError: If `sample_rate` is 0.
|
|
1192
1555
|
TypeError: If `cutoff_freq` is not of type float.
|
|
1193
1556
|
TypeError: If `Q` is not of type float.
|
|
@@ -1223,7 +1586,13 @@ class Magphase(AudioTensorOperation):
|
|
|
1223
1586
|
Separate a complex-valued spectrogram with shape (..., 2) into its magnitude and phase.
|
|
1224
1587
|
|
|
1225
1588
|
Args:
|
|
1226
|
-
power (float): Power of the norm, which must be non-negative
|
|
1589
|
+
power (float): Power of the norm, which must be non-negative. Default: 1.0.
|
|
1590
|
+
|
|
1591
|
+
Raises:
|
|
1592
|
+
RuntimeError: If the shape of input audio waveform does not match (..., 2).
|
|
1593
|
+
|
|
1594
|
+
Supported Platforms:
|
|
1595
|
+
``CPU``
|
|
1227
1596
|
|
|
1228
1597
|
Examples:
|
|
1229
1598
|
>>> import numpy as np
|
|
@@ -1245,13 +1614,21 @@ class Magphase(AudioTensorOperation):
|
|
|
1245
1614
|
|
|
1246
1615
|
class MaskAlongAxis(AudioTensorOperation):
|
|
1247
1616
|
"""
|
|
1248
|
-
Apply a mask along `axis
|
|
1617
|
+
Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` .
|
|
1249
1618
|
|
|
1250
1619
|
Args:
|
|
1251
1620
|
mask_start (int): Starting position of the mask, which must be non negative.
|
|
1252
|
-
mask_width (int): The width of the mask, which must be
|
|
1621
|
+
mask_width (int): The width of the mask, which must be larger than 0.
|
|
1253
1622
|
mask_value (float): Value to assign to the masked columns.
|
|
1254
|
-
axis (int): Axis to apply
|
|
1623
|
+
axis (int): Axis to apply mask on (1 for frequency and 2 for time).
|
|
1624
|
+
|
|
1625
|
+
Raises:
|
|
1626
|
+
ValueError: If `mask_start` is invalid (< 0).
|
|
1627
|
+
ValueError: If `mask_width` is invalid (< 1).
|
|
1628
|
+
ValueError: If `axis` is not type of int or not within [1, 2].
|
|
1629
|
+
|
|
1630
|
+
Supported Platforms:
|
|
1631
|
+
``CPU``
|
|
1255
1632
|
|
|
1256
1633
|
Examples:
|
|
1257
1634
|
>>> import numpy as np
|
|
@@ -1276,15 +1653,27 @@ class MaskAlongAxis(AudioTensorOperation):
|
|
|
1276
1653
|
|
|
1277
1654
|
class MaskAlongAxisIID(AudioTensorOperation):
|
|
1278
1655
|
"""
|
|
1279
|
-
Apply a mask along `axis
|
|
1280
|
-
`mask_width` is sampled from `uniform[0, mask_param]
|
|
1281
|
-
`max_length` is the number of columns of the specified axis
|
|
1656
|
+
Apply a mask along `axis` . Mask will be applied from indices `[mask_start, mask_start + mask_width)` , where
|
|
1657
|
+
`mask_width` is sampled from `uniform[0, mask_param]` , and `mask_start` from
|
|
1658
|
+
`uniform[0, max_length - mask_width]` , `max_length` is the number of columns of the specified axis
|
|
1659
|
+
of the spectrogram.
|
|
1282
1660
|
|
|
1283
1661
|
Args:
|
|
1284
1662
|
mask_param (int): Number of columns to be masked, will be uniformly sampled from
|
|
1285
1663
|
[0, mask_param], must be non negative.
|
|
1286
1664
|
mask_value (float): Value to assign to the masked columns.
|
|
1287
|
-
axis (int): Axis to apply
|
|
1665
|
+
axis (int): Axis to apply mask on (1 for frequency and 2 for time).
|
|
1666
|
+
|
|
1667
|
+
Raises:
|
|
1668
|
+
TypeError: If `mask_param` is not of type int.
|
|
1669
|
+
ValueError: If `mask_param` is a negative value.
|
|
1670
|
+
TypeError: If `mask_value` is not of type float.
|
|
1671
|
+
TypeError: If `axis` is not of type int.
|
|
1672
|
+
ValueError: If `axis` is not in range of [1, 2].
|
|
1673
|
+
RuntimeError: If input tensor is not in shape of <..., freq, time>.
|
|
1674
|
+
|
|
1675
|
+
Supported Platforms:
|
|
1676
|
+
``CPU``
|
|
1288
1677
|
|
|
1289
1678
|
Examples:
|
|
1290
1679
|
>>> import numpy as np
|
|
@@ -1318,15 +1707,32 @@ class MelScale(AudioTensorOperation):
|
|
|
1318
1707
|
Convert normal STFT to STFT at the Mel scale.
|
|
1319
1708
|
|
|
1320
1709
|
Args:
|
|
1321
|
-
n_mels (int, optional): Number of mel filterbanks
|
|
1322
|
-
sample_rate (int, optional): Sample rate of audio signal
|
|
1323
|
-
f_min (float, optional): Minimum frequency
|
|
1324
|
-
f_max (float, optional): Maximum frequency
|
|
1325
|
-
n_stft (int, optional): Number of bins in STFT
|
|
1710
|
+
n_mels (int, optional): Number of mel filterbanks. Default: 128.
|
|
1711
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
|
|
1712
|
+
f_min (float, optional): Minimum frequency. Default: 0.0.
|
|
1713
|
+
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
|
|
1714
|
+
n_stft (int, optional): Number of bins in STFT. Default: 201.
|
|
1326
1715
|
norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType::NONE.
|
|
1327
1716
|
If norm is NormType.SLANEY, divide the triangular mel weight by the width of the mel band.
|
|
1328
|
-
|
|
1329
|
-
mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK
|
|
1717
|
+
Default: NormType.NONE, no narmalization.
|
|
1718
|
+
mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1719
|
+
|
|
1720
|
+
Raises:
|
|
1721
|
+
TypeError: If `n_mels` is not of type int.
|
|
1722
|
+
ValueError: If `n_mels` is not positive.
|
|
1723
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1724
|
+
ValueError: If `sample_rate` is not positive.
|
|
1725
|
+
TypeError: If `f_min` is not of type float.
|
|
1726
|
+
ValueError: If `f_min` is greater than or equal to `f_max` .
|
|
1727
|
+
TypeError: If `f_max` is not of type float.
|
|
1728
|
+
ValueError: If `f_max` is a negative number.
|
|
1729
|
+
TypeError: If `n_stft` is not of type int.
|
|
1730
|
+
ValueError: If `n_stft` is not positive.
|
|
1731
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1732
|
+
TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1733
|
+
|
|
1734
|
+
Supported Platforms:
|
|
1735
|
+
``CPU``
|
|
1330
1736
|
|
|
1331
1737
|
Examples:
|
|
1332
1738
|
>>> import numpy as np
|
|
@@ -1339,7 +1745,7 @@ class MelScale(AudioTensorOperation):
|
|
|
1339
1745
|
"""
|
|
1340
1746
|
|
|
1341
1747
|
@check_mel_scale
|
|
1342
|
-
def __init__(self, n_mels=128, sample_rate=16000, f_min=0, f_max=None, n_stft=201, norm=NormType.NONE,
|
|
1748
|
+
def __init__(self, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, n_stft=201, norm=NormType.NONE,
|
|
1343
1749
|
mel_type=MelType.HTK):
|
|
1344
1750
|
super().__init__()
|
|
1345
1751
|
self.n_mels = n_mels
|
|
@@ -1355,12 +1761,215 @@ class MelScale(AudioTensorOperation):
|
|
|
1355
1761
|
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
|
|
1356
1762
|
|
|
1357
1763
|
|
|
1764
|
+
class MelSpectrogram(AudioTensorOperation):
|
|
1765
|
+
r"""
|
|
1766
|
+
Create MelSpectrogram for a raw audio signal.
|
|
1767
|
+
|
|
1768
|
+
Args:
|
|
1769
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0. Default: 16000.
|
|
1770
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0 and less than
|
|
1771
|
+
twice of the last dimension size of the input. Default: 400.
|
|
1772
|
+
win_length (int, optional): Window size, which should be greater than 0 and no more than `n_fft` . Default:
|
|
1773
|
+
None, will be set to `n_fft` .
|
|
1774
|
+
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1775
|
+
Default: None, will be set to `win_length // 2` .
|
|
1776
|
+
f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: 0.0.
|
|
1777
|
+
f_max (float, optional): Maximum frequency, which can't be less than 0. Default: None, will be set
|
|
1778
|
+
to `sample_rate // 2` .
|
|
1779
|
+
pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: 0.
|
|
1780
|
+
n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: 128.
|
|
1781
|
+
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1782
|
+
frame/window. Default: WindowType.HANN.
|
|
1783
|
+
power (float, optional): Exponent for the magnitude spectrogram, which must be
|
|
1784
|
+
greater than 0, e.g., 1 for energy, 2 for power, etc. Default: 2.0.
|
|
1785
|
+
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False.
|
|
1786
|
+
center (bool, optional): Whether to pad waveform on both sides. Default: True.
|
|
1787
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
|
|
1788
|
+
can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
|
|
1789
|
+
Default: BorderType.REFLECT.
|
|
1790
|
+
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True.
|
|
1791
|
+
norm (NormType, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
|
|
1792
|
+
(area normalization). Default: NormType.NONE, no narmalization.
|
|
1793
|
+
mel_scale (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1794
|
+
|
|
1795
|
+
Raises:
|
|
1796
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1797
|
+
TypeError: If `n_fft` is not of type int.
|
|
1798
|
+
TypeError: If `n_mels` is not of type int.
|
|
1799
|
+
TypeError: If `f_min` is not of type float.
|
|
1800
|
+
TypeError: If `f_max` is not of type float.
|
|
1801
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1802
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1803
|
+
TypeError: If `mel_scale` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1804
|
+
TypeError: If `power` is not of type float.
|
|
1805
|
+
TypeError: If `normalized` is not of type bool.
|
|
1806
|
+
TypeError: If `center` is not of type bool.
|
|
1807
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
1808
|
+
TypeError: If `onesided` is not of type bool.
|
|
1809
|
+
TypeError: If `pad` is not of type int.
|
|
1810
|
+
TypeError: If `win_length` is not of type int.
|
|
1811
|
+
TypeError: If `hop_length` is not of type int.
|
|
1812
|
+
ValueError: If `sample_rate` is a negative number.
|
|
1813
|
+
ValueError: If `n_fft` is not positive.
|
|
1814
|
+
ValueError: If `n_mels` is a negative number.
|
|
1815
|
+
ValueError: If `f_min` is greater than `f_max` .
|
|
1816
|
+
ValueError: If `f_max` is a negative number.
|
|
1817
|
+
ValueError: If `f_min` is not less than `sample_rate // 2` when `f_max` is set to None.
|
|
1818
|
+
ValueError: If `power` is not positive.
|
|
1819
|
+
ValueError: If `pad` is a negative number.
|
|
1820
|
+
ValueError: If `win_length` is not positive.
|
|
1821
|
+
ValueError: If `hop_length` is not positive.
|
|
1822
|
+
|
|
1823
|
+
Supported Platforms:
|
|
1824
|
+
``CPU``
|
|
1825
|
+
|
|
1826
|
+
Examples:
|
|
1827
|
+
>>> import numpy as np
|
|
1828
|
+
>>>
|
|
1829
|
+
>>> from mindspore.dataset.audio import WindowType, BorderType, NormType, MelType
|
|
1830
|
+
>>>
|
|
1831
|
+
>>> waveform = np.array([[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4]]])
|
|
1832
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1833
|
+
>>> transforms = [audio.MelSpectrogram(sample_rate=16000, n_fft=16, win_length=16, hop_length=8, f_min=0.0, \
|
|
1834
|
+
... f_max=5000.0, pad=0, n_mels=8, window=WindowType.HANN, power=2.0, \
|
|
1835
|
+
... normalized=False, center=True, pad_mode=BorderType.REFLECT, \
|
|
1836
|
+
... onesided=True, norm=NormType.SLANEY, mel_scale=MelType.HTK)]
|
|
1837
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1838
|
+
"""
|
|
1839
|
+
|
|
1840
|
+
@check_mel_spectrogram
|
|
1841
|
+
def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0.0, f_max=None, pad=0,
|
|
1842
|
+
n_mels=128, window=WindowType.HANN, power=2.0, normalized=False, center=True,
|
|
1843
|
+
pad_mode=BorderType.REFLECT, onesided=True, norm=NormType.NONE, mel_scale=MelType.HTK):
|
|
1844
|
+
super().__init__()
|
|
1845
|
+
self.sample_rate = sample_rate
|
|
1846
|
+
self.n_fft = n_fft
|
|
1847
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
1848
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
|
|
1849
|
+
self.f_min = f_min
|
|
1850
|
+
self.f_max = f_max if f_max is not None else sample_rate // 2
|
|
1851
|
+
self.pad = pad
|
|
1852
|
+
self.n_mels = n_mels
|
|
1853
|
+
self.window = window
|
|
1854
|
+
self.power = power
|
|
1855
|
+
self.normalized = normalized
|
|
1856
|
+
self.center = center
|
|
1857
|
+
self.pad_mode = pad_mode
|
|
1858
|
+
self.onesided = onesided
|
|
1859
|
+
self.norm = norm
|
|
1860
|
+
self.mel_scale = mel_scale
|
|
1861
|
+
|
|
1862
|
+
def parse(self):
|
|
1863
|
+
return cde.MelSpectrogramOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length, self.f_min,
|
|
1864
|
+
self.f_max, self.pad, self.n_mels, DE_C_WINDOW_TYPE.get(self.window),
|
|
1865
|
+
self.power, self.normalized, self.center,
|
|
1866
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided,
|
|
1867
|
+
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_scale))
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
class MFCC(AudioTensorOperation):
|
|
1871
|
+
"""
|
|
1872
|
+
Create MFCC for a raw audio signal.
|
|
1873
|
+
|
|
1874
|
+
Args:
|
|
1875
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: 16000.
|
|
1876
|
+
n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: 40.
|
|
1877
|
+
dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be 2. Default: 2.
|
|
1878
|
+
norm (NormMode, optional): Norm to use. Default: NormMode.ORTHO.
|
|
1879
|
+
log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: False.
|
|
1880
|
+
melkwargs (dict, optional): Arguments for :class:`mindspore.dataset.audio.MelSpectrogram`.
|
|
1881
|
+
Default: None, the default setting is a dict including
|
|
1882
|
+
|
|
1883
|
+
- 'n_fft': 400
|
|
1884
|
+
- 'win_length': n_fft
|
|
1885
|
+
- 'hop_length': win_length // 2
|
|
1886
|
+
- 'f_min': 0.0
|
|
1887
|
+
- 'f_max': sample_rate // 2
|
|
1888
|
+
- 'pad': 0
|
|
1889
|
+
- 'window': WindowType.HANN
|
|
1890
|
+
- 'power': 2.0
|
|
1891
|
+
- 'normalized': False
|
|
1892
|
+
- 'center': True
|
|
1893
|
+
- 'pad_mode': BorderType.REFLECT
|
|
1894
|
+
- 'onesided': True
|
|
1895
|
+
- 'norm': NormType.NONE
|
|
1896
|
+
- 'mel_scale': MelType.HTK
|
|
1897
|
+
|
|
1898
|
+
Raises:
|
|
1899
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1900
|
+
TypeError: If `log_mels` is not of type bool.
|
|
1901
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
|
|
1902
|
+
TypeError: If `n_mfcc` is not of type int.
|
|
1903
|
+
TypeError: If `melkwargs` is not of type dict.
|
|
1904
|
+
ValueError: If `sample_rate` is a negative number.
|
|
1905
|
+
ValueError: If `n_mfcc` is a negative number.
|
|
1906
|
+
ValueError: If `dct_type` is not 2.
|
|
1907
|
+
|
|
1908
|
+
Supported Platforms:
|
|
1909
|
+
``CPU``
|
|
1910
|
+
|
|
1911
|
+
Examples:
|
|
1912
|
+
>>> import numpy as np
|
|
1913
|
+
>>>
|
|
1914
|
+
>>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1915
|
+
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
|
|
1916
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1917
|
+
>>> transforms = [audio.MFCC(4000, 1500, 2)]
|
|
1918
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1919
|
+
"""
|
|
1920
|
+
|
|
1921
|
+
@check_mfcc
|
|
1922
|
+
def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None):
|
|
1923
|
+
super().__init__()
|
|
1924
|
+
self.sample_rate = sample_rate
|
|
1925
|
+
self.n_mfcc = n_mfcc
|
|
1926
|
+
self.dct_type = dct_type
|
|
1927
|
+
self.norm = norm
|
|
1928
|
+
self.log_mels = log_mels
|
|
1929
|
+
self.melkwargs = melkwargs
|
|
1930
|
+
if melkwargs is None:
|
|
1931
|
+
self.melkwargs = {}
|
|
1932
|
+
self.melkwargs.setdefault("n_fft", 400)
|
|
1933
|
+
self.melkwargs.setdefault("win_length", self.melkwargs.get("n_fft"))
|
|
1934
|
+
self.melkwargs.setdefault("hop_length", self.melkwargs.get("win_length") // 2)
|
|
1935
|
+
self.melkwargs.setdefault("f_min", 0.0)
|
|
1936
|
+
self.melkwargs.setdefault("f_max", sample_rate // 2)
|
|
1937
|
+
self.melkwargs.setdefault("pad", 0)
|
|
1938
|
+
self.melkwargs.setdefault("n_mels", 128)
|
|
1939
|
+
self.melkwargs.setdefault("window", WindowType.HANN)
|
|
1940
|
+
self.melkwargs.setdefault("power", 2.0)
|
|
1941
|
+
self.melkwargs.setdefault("normalized", False)
|
|
1942
|
+
self.melkwargs.setdefault("center", True)
|
|
1943
|
+
self.melkwargs.setdefault("pad_mode", BorderType.REFLECT)
|
|
1944
|
+
self.melkwargs.setdefault("onesided", True)
|
|
1945
|
+
self.melkwargs.setdefault("norm", NormType.NONE)
|
|
1946
|
+
self.melkwargs.setdefault("mel_scale", MelType.HTK)
|
|
1947
|
+
self.window = self.melkwargs.get("window")
|
|
1948
|
+
self.pad_mode = self.melkwargs.get("pad_mode")
|
|
1949
|
+
self.norm_mel = self.melkwargs.get("norm")
|
|
1950
|
+
self.mel_scale = self.melkwargs.get("mel_scale")
|
|
1951
|
+
|
|
1952
|
+
def parse(self):
|
|
1953
|
+
return cde.MFCCOperation(self.sample_rate, self.n_mfcc, self.dct_type, DE_C_NORM_MODE.get(self.norm),
|
|
1954
|
+
self.log_mels, self.melkwargs, DE_C_WINDOW_TYPE.get(self.window),
|
|
1955
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), DE_C_NORM_TYPE.get(self.norm_mel),
|
|
1956
|
+
DE_C_MEL_TYPE.get(self.mel_scale))
|
|
1957
|
+
|
|
1958
|
+
|
|
1358
1959
|
class MuLawDecoding(AudioTensorOperation):
|
|
1359
1960
|
"""
|
|
1360
|
-
Decode mu-law encoded signal.
|
|
1961
|
+
Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
|
|
1361
1962
|
|
|
1362
1963
|
Args:
|
|
1363
|
-
quantization_channels (int, optional): Number of channels, which must be positive
|
|
1964
|
+
quantization_channels (int, optional): Number of channels, which must be positive. Default: 256.
|
|
1965
|
+
|
|
1966
|
+
Raises:
|
|
1967
|
+
TypeError: If `quantization_channels` is not of type int.
|
|
1968
|
+
ValueError: If `quantization_channels` is not a positive number.
|
|
1969
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1970
|
+
|
|
1971
|
+
Supported Platforms:
|
|
1972
|
+
``CPU``
|
|
1364
1973
|
|
|
1365
1974
|
Examples:
|
|
1366
1975
|
>>> import numpy as np
|
|
@@ -1385,7 +1994,14 @@ class MuLawEncoding(AudioTensorOperation):
|
|
|
1385
1994
|
Encode signal based on mu-law companding.
|
|
1386
1995
|
|
|
1387
1996
|
Args:
|
|
1388
|
-
quantization_channels (int, optional): Number of channels, which must be positive
|
|
1997
|
+
quantization_channels (int, optional): Number of channels, which must be positive. Default: 256.
|
|
1998
|
+
|
|
1999
|
+
Raises:
|
|
2000
|
+
TypeError: If `quantization_channels` is not of type int.
|
|
2001
|
+
ValueError: If `quantization_channels` is not a positive number.
|
|
2002
|
+
|
|
2003
|
+
Supported Platforms:
|
|
2004
|
+
``CPU``
|
|
1389
2005
|
|
|
1390
2006
|
Examples:
|
|
1391
2007
|
>>> import numpy as np
|
|
@@ -1407,12 +2023,24 @@ class MuLawEncoding(AudioTensorOperation):
|
|
|
1407
2023
|
|
|
1408
2024
|
class Overdrive(AudioTensorOperation):
|
|
1409
2025
|
"""
|
|
1410
|
-
Apply overdrive
|
|
2026
|
+
Apply an overdrive effect to the audio waveform.
|
|
2027
|
+
|
|
2028
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1411
2029
|
|
|
1412
2030
|
Args:
|
|
1413
|
-
gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100]
|
|
2031
|
+
gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100]. Default: 20.0.
|
|
1414
2032
|
color (float, optional): Controls the amount of even harmonic content in the over-driven output,
|
|
1415
|
-
in range of [0, 100]
|
|
2033
|
+
in range of [0, 100]. Default: 20.0.
|
|
2034
|
+
|
|
2035
|
+
Raises:
|
|
2036
|
+
TypeError: If `gain` is not of type float.
|
|
2037
|
+
ValueError: If `gain` is not in range of [0, 100].
|
|
2038
|
+
TypeError: If `color` is not of type float.
|
|
2039
|
+
ValueError: If `color` is not in range of [0, 100].
|
|
2040
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2041
|
+
|
|
2042
|
+
Supported Platforms:
|
|
2043
|
+
``CPU``
|
|
1416
2044
|
|
|
1417
2045
|
Examples:
|
|
1418
2046
|
>>> import numpy as np
|
|
@@ -1437,18 +2065,37 @@ class Phaser(AudioTensorOperation):
|
|
|
1437
2065
|
"""
|
|
1438
2066
|
Apply a phasing effect to the audio.
|
|
1439
2067
|
|
|
2068
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
2069
|
+
|
|
1440
2070
|
Args:
|
|
1441
2071
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
|
|
1442
|
-
gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB
|
|
1443
|
-
|
|
1444
|
-
gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB
|
|
1445
|
-
|
|
1446
|
-
delay_ms (float, optional): Desired delay in
|
|
1447
|
-
decay (float, optional): Desired decay relative to gain-in
|
|
1448
|
-
mod_speed (float, optional): Modulation speed in Hz
|
|
2072
|
+
gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB,
|
|
2073
|
+
in range of [0.0, 1.0]. Default: 0.4.
|
|
2074
|
+
gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB,
|
|
2075
|
+
in range of [0.0, 1e9]. Default: 0.74.
|
|
2076
|
+
delay_ms (float, optional): Desired delay in milliseconds, in range of [0.0, 5.0]. Default: 3.0.
|
|
2077
|
+
decay (float, optional): Desired decay relative to gain-in, in range of [0.0, 0.99]. Default: 0.4.
|
|
2078
|
+
mod_speed (float, optional): Modulation speed in Hz, in range of [0.1, 2.0]. Default: 0.5.
|
|
1449
2079
|
sinusoidal (bool, optional): If True, use sinusoidal modulation (preferable for multiple instruments).
|
|
1450
|
-
If False, use triangular modulation (gives single instruments a sharper
|
|
1451
|
-
|
|
2080
|
+
If False, use triangular modulation (gives single instruments a sharper phasing effect). Default: True.
|
|
2081
|
+
|
|
2082
|
+
Raises:
|
|
2083
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2084
|
+
TypeError: If `gain_in` is not of type float.
|
|
2085
|
+
ValueError: If `gain_in` is not in range of [0.0, 1.0].
|
|
2086
|
+
TypeError: If `gain_out` is not of type float.
|
|
2087
|
+
ValueError: If `gain_out` is not in range of [0.0, 1e9].
|
|
2088
|
+
TypeError: If `delay_ms` is not of type float.
|
|
2089
|
+
ValueError: If `delay_ms` is not in range of [0.0, 5.0].
|
|
2090
|
+
TypeError: If `decay` is not of type float.
|
|
2091
|
+
ValueError: If `decay` is not in range of [0.0, 0.99].
|
|
2092
|
+
TypeError: If `mod_speed` is not of type float.
|
|
2093
|
+
ValueError: If `mod_speed` is not in range of [0.1, 2.0].
|
|
2094
|
+
TypeError: If `sinusoidal` is not of type bool.
|
|
2095
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2096
|
+
|
|
2097
|
+
Supported Platforms:
|
|
2098
|
+
``CPU``
|
|
1452
2099
|
|
|
1453
2100
|
Examples:
|
|
1454
2101
|
>>> import numpy as np
|
|
@@ -1478,11 +2125,20 @@ class Phaser(AudioTensorOperation):
|
|
|
1478
2125
|
|
|
1479
2126
|
class PhaseVocoder(AudioTensorOperation):
|
|
1480
2127
|
"""
|
|
1481
|
-
Given a STFT
|
|
2128
|
+
Given a STFT spectrogram, speed up in time without modifying pitch by a factor of rate.
|
|
1482
2129
|
|
|
1483
2130
|
Args:
|
|
1484
2131
|
rate (float): Speed-up factor.
|
|
1485
|
-
phase_advance (numpy.ndarray): Expected phase advance in each bin in shape of (freq, 1).
|
|
2132
|
+
phase_advance (numpy.ndarray): Expected phase advance in each bin, in shape of (freq, 1).
|
|
2133
|
+
|
|
2134
|
+
Raises:
|
|
2135
|
+
TypeError: If `rate` is not of type float.
|
|
2136
|
+
ValueError: If `rate` is not a positive number.
|
|
2137
|
+
TypeError: If `phase_advance` is not of type :class:`numpy.ndarray` .
|
|
2138
|
+
RuntimeError: If input tensor is not in shape of <..., freq, num_frame, complex=2>.
|
|
2139
|
+
|
|
2140
|
+
Supported Platforms:
|
|
2141
|
+
``CPU``
|
|
1486
2142
|
|
|
1487
2143
|
Examples:
|
|
1488
2144
|
>>> import numpy as np
|
|
@@ -1504,6 +2160,68 @@ class PhaseVocoder(AudioTensorOperation):
|
|
|
1504
2160
|
return cde.PhaseVocoderOperation(self.rate, self.phase_advance)
|
|
1505
2161
|
|
|
1506
2162
|
|
|
2163
|
+
class PitchShift(AudioTensorOperation):
|
|
2164
|
+
"""
|
|
2165
|
+
Shift the pitch of a waveform by `n_steps` steps.
|
|
2166
|
+
|
|
2167
|
+
Args:
|
|
2168
|
+
sample_rate (int): Sampling rate of waveform (in Hz).
|
|
2169
|
+
n_steps (int): The steps to shift waveform.
|
|
2170
|
+
bins_per_octave (int, optional): The number of steps per octave. Default: 12.
|
|
2171
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 512.
|
|
2172
|
+
win_length (int, optional): Window size. Default: None, will be set to `n_fft` .
|
|
2173
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: None,
|
|
2174
|
+
will be set to `win_length // 4` .
|
|
2175
|
+
window (WindowType, optional): Window tensor that is applied/multiplied to each frame/window.
|
|
2176
|
+
Default: WindowType.HANN.
|
|
2177
|
+
|
|
2178
|
+
Raises:
|
|
2179
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2180
|
+
TypeError: If `n_steps` is not of type int.
|
|
2181
|
+
TypeError: If `bins_per_octave` is not of type int.
|
|
2182
|
+
TypeError: If `n_fft` is not of type int.
|
|
2183
|
+
TypeError: If `win_length` is not of type int.
|
|
2184
|
+
TypeError: If `hop_length` is not of type int.
|
|
2185
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
2186
|
+
ValueError: If `sample_rate` is a negative number.
|
|
2187
|
+
ValueError: If `bins_per_octave` is 0.
|
|
2188
|
+
ValueError: If `n_fft` is a negative number.
|
|
2189
|
+
ValueError: If `win_length` is not positive.
|
|
2190
|
+
ValueError: If `hop_length` is not positive.
|
|
2191
|
+
|
|
2192
|
+
Supported Platforms:
|
|
2193
|
+
``CPU``
|
|
2194
|
+
|
|
2195
|
+
Examples:
|
|
2196
|
+
>>> import numpy as np
|
|
2197
|
+
>>>
|
|
2198
|
+
>>> import mindspore.dataset as ds
|
|
2199
|
+
>>> import mindspore.dataset.audio as audio
|
|
2200
|
+
>>> from mindspore.dataset.audio import WindowType
|
|
2201
|
+
>>>
|
|
2202
|
+
>>> waveform = np.random.random([1, 1, 300])
|
|
2203
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2204
|
+
>>> transforms = [audio.PitchShift(sample_rate=16000,n_steps=4)]
|
|
2205
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2206
|
+
"""
|
|
2207
|
+
|
|
2208
|
+
@check_pitch_shift
|
|
2209
|
+
def __init__(self, sample_rate, n_steps, bins_per_octave=12, n_fft=512, win_length=None,
|
|
2210
|
+
hop_length=None, window=WindowType.HANN):
|
|
2211
|
+
super().__init__()
|
|
2212
|
+
self.sample_rate = sample_rate
|
|
2213
|
+
self.n_steps = n_steps
|
|
2214
|
+
self.bins_per_octave = bins_per_octave
|
|
2215
|
+
self.n_fft = n_fft
|
|
2216
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
2217
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 4
|
|
2218
|
+
self.window = window
|
|
2219
|
+
|
|
2220
|
+
def parse(self):
|
|
2221
|
+
return cde.PitchShiftOperation(self.sample_rate, self.n_steps, self.bins_per_octave, self.n_fft,
|
|
2222
|
+
self.win_length, self.hop_length, DE_C_WINDOW_TYPE.get(self.window))
|
|
2223
|
+
|
|
2224
|
+
|
|
1507
2225
|
DE_C_RESAMPLE_METHOD = {ResampleMethod.SINC_INTERPOLATION: cde.ResampleMethod.DE_RESAMPLE_SINC_INTERPOLATION,
|
|
1508
2226
|
ResampleMethod.KAISER_WINDOW: cde.ResampleMethod.DE_RESAMPLE_KAISER_WINDOW}
|
|
1509
2227
|
|
|
@@ -1513,16 +2231,30 @@ class Resample(AudioTensorOperation):
|
|
|
1513
2231
|
Resample a signal from one frequency to another. A resample method can be given.
|
|
1514
2232
|
|
|
1515
2233
|
Args:
|
|
1516
|
-
orig_freq (float, optional): The original frequency of the signal,
|
|
1517
|
-
new_freq (float, optional): The desired frequency,
|
|
1518
|
-
resample_method (ResampleMethod, optional): The resample method,
|
|
1519
|
-
ResampleMethod.
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
efficient, which must be positive (default=6).
|
|
2234
|
+
orig_freq (float, optional): The original frequency of the signal, must be positive. Default: 16000.
|
|
2235
|
+
new_freq (float, optional): The desired frequency, must be positive. Default: 16000.
|
|
2236
|
+
resample_method (ResampleMethod, optional): The resample method to use, can be ResampleMethod.SINC_INTERPOLATION
|
|
2237
|
+
or ResampleMethod.KAISER_WINDOW. Default: ResampleMethod.SINC_INTERPOLATION.
|
|
2238
|
+
lowpass_filter_width (int, optional): Controls the sharpness of the filter, more means sharper but less
|
|
2239
|
+
efficient, must be positive. Default: 6.
|
|
1523
2240
|
rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist. Lower values
|
|
1524
|
-
reduce anti-aliasing, but also reduce some of the highest frequencies, range
|
|
1525
|
-
beta (float, optional): The shape parameter used for kaiser window
|
|
2241
|
+
reduce anti-aliasing, but also reduce some of the highest frequencies, in range of (0, 1]. Default: 0.99.
|
|
2242
|
+
beta (float, optional): The shape parameter used for kaiser window. Default: None, will use 14.769656459379492.
|
|
2243
|
+
|
|
2244
|
+
Raises:
|
|
2245
|
+
TypeError: If `orig_freq` is not of type float.
|
|
2246
|
+
ValueError: If `orig_freq` is not a positive number.
|
|
2247
|
+
TypeError: If `new_freq` is not of type float.
|
|
2248
|
+
ValueError: If `new_freq` is not a positive number.
|
|
2249
|
+
TypeError: If `resample_method` is not of type :class:`mindspore.dataset.audio.ResampleMethod` .
|
|
2250
|
+
TypeError: If `lowpass_filter_width` is not of type int.
|
|
2251
|
+
ValueError: If `lowpass_filter_width` is not a positive number.
|
|
2252
|
+
TypeError: If `rolloff` is not of type float.
|
|
2253
|
+
ValueError: If `rolloff` is not in range of (0, 1].
|
|
2254
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2255
|
+
|
|
2256
|
+
Supported Platforms:
|
|
2257
|
+
``CPU``
|
|
1526
2258
|
|
|
1527
2259
|
Examples:
|
|
1528
2260
|
>>> import numpy as np
|
|
@@ -1555,12 +2287,21 @@ class Resample(AudioTensorOperation):
|
|
|
1555
2287
|
|
|
1556
2288
|
class RiaaBiquad(AudioTensorOperation):
|
|
1557
2289
|
"""
|
|
1558
|
-
Apply RIAA vinyl playback equalization.
|
|
2290
|
+
Apply RIAA vinyl playback equalization.
|
|
2291
|
+
|
|
2292
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1559
2293
|
|
|
1560
2294
|
Args:
|
|
1561
2295
|
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
|
|
1562
2296
|
can only be one of 44100, 48000, 88200, 96000.
|
|
1563
2297
|
|
|
2298
|
+
Raises:
|
|
2299
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2300
|
+
ValueError: If `sample_rate` is not any of [44100, 48000, 88200, 96000].
|
|
2301
|
+
|
|
2302
|
+
Supported Platforms:
|
|
2303
|
+
``CPU``
|
|
2304
|
+
|
|
1564
2305
|
Examples:
|
|
1565
2306
|
>>> import numpy as np
|
|
1566
2307
|
>>>
|
|
@@ -1584,12 +2325,23 @@ class SlidingWindowCmn(AudioTensorOperation):
|
|
|
1584
2325
|
Apply sliding-window cepstral mean (and optionally variance) normalization per utterance.
|
|
1585
2326
|
|
|
1586
2327
|
Args:
|
|
1587
|
-
cmn_window (int, optional): Window in frames for running average CMN computation
|
|
2328
|
+
cmn_window (int, optional): Window in frames for running average CMN computation. Default: 600.
|
|
1588
2329
|
min_cmn_window (int, optional): Minimum CMN window used at start of decoding (adds latency only at start).
|
|
1589
|
-
Only applicable if center is False, ignored if center is True
|
|
2330
|
+
Only applicable if center is False, ignored if center is True. Default: 100.
|
|
1590
2331
|
center (bool, optional): If True, use a window centered on the current frame. If False, window is
|
|
1591
|
-
to the left.
|
|
1592
|
-
norm_vars (bool, optional): If True, normalize variance to one.
|
|
2332
|
+
to the left. Default: False.
|
|
2333
|
+
norm_vars (bool, optional): If True, normalize variance to one. Default: False.
|
|
2334
|
+
|
|
2335
|
+
Raises:
|
|
2336
|
+
TypeError: If `cmn_window` is not of type int.
|
|
2337
|
+
ValueError: If `cmn_window` is a negative number.
|
|
2338
|
+
TypeError: If `min_cmn_window` is not of type int.
|
|
2339
|
+
ValueError: If `min_cmn_window` is a negative number.
|
|
2340
|
+
TypeError: If `center` is not of type bool.
|
|
2341
|
+
TypeError: If `norm_vars` is not of type bool.
|
|
2342
|
+
|
|
2343
|
+
Supported Platforms:
|
|
2344
|
+
``CPU``
|
|
1593
2345
|
|
|
1594
2346
|
Examples:
|
|
1595
2347
|
>>> import numpy as np
|
|
@@ -1621,17 +2373,35 @@ DE_C_WINDOW_TYPE = {WindowType.BARTLETT: cde.WindowType.DE_WINDOW_TYPE_BARTLETT,
|
|
|
1621
2373
|
|
|
1622
2374
|
class SpectralCentroid(TensorOperation):
|
|
1623
2375
|
"""
|
|
1624
|
-
|
|
2376
|
+
Compute the spectral centroid for each channel along the time axis.
|
|
1625
2377
|
|
|
1626
2378
|
Args:
|
|
1627
|
-
sample_rate (int): Sampling rate of
|
|
1628
|
-
n_fft (int, optional): Size of FFT, creates n_fft // 2 + 1 bins
|
|
1629
|
-
win_length (int, optional): Window size
|
|
1630
|
-
hop_length (int, optional): Length of hop between STFT windows
|
|
1631
|
-
pad (int, optional): Two sided padding of signal
|
|
2379
|
+
sample_rate (int): Sampling rate of audio signal, e.g. 44100 (Hz).
|
|
2380
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400.
|
|
2381
|
+
win_length (int, optional): Window size. Default: None, will use `n_fft` .
|
|
2382
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: None, will use `win_length // 2` .
|
|
2383
|
+
pad (int, optional): Two sided padding of signal. Default: 0.
|
|
1632
2384
|
window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
|
|
1633
|
-
|
|
1634
|
-
or WindowType.KAISER
|
|
2385
|
+
can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
|
|
2386
|
+
or WindowType.KAISER. Default: WindowType.HANN.
|
|
2387
|
+
|
|
2388
|
+
Raises:
|
|
2389
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2390
|
+
ValueError: If `sample_rate` is a negative number.
|
|
2391
|
+
TypeError: If `n_fft` is not of type int.
|
|
2392
|
+
ValueError: If `n_fft` is not a positive number.
|
|
2393
|
+
TypeError: If `win_length` is not of type int.
|
|
2394
|
+
ValueError: If `win_length` is not a positive number.
|
|
2395
|
+
ValueError: If `win_length` is greater than `n_fft` .
|
|
2396
|
+
TypeError: If `hop_length` is not of type int.
|
|
2397
|
+
ValueError: If `hop_length` is not a positive number.
|
|
2398
|
+
TypeError: If `pad` is not of type int.
|
|
2399
|
+
ValueError: If `pad` is a negative number.
|
|
2400
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
2401
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2402
|
+
|
|
2403
|
+
Supported Platforms:
|
|
2404
|
+
``CPU``
|
|
1635
2405
|
|
|
1636
2406
|
Examples:
|
|
1637
2407
|
>>> import numpy as np
|
|
@@ -1662,21 +2432,43 @@ class Spectrogram(TensorOperation):
|
|
|
1662
2432
|
Create a spectrogram from an audio signal.
|
|
1663
2433
|
|
|
1664
2434
|
Args:
|
|
1665
|
-
n_fft (int, optional): Size of FFT, creates n_fft // 2 + 1 bins
|
|
1666
|
-
win_length (int, optional): Window size
|
|
1667
|
-
hop_length (int, optional): Length of hop between STFT windows
|
|
1668
|
-
pad (int): Two sided padding of signal
|
|
2435
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400.
|
|
2436
|
+
win_length (int, optional): Window size. Default: None, will use `n_fft` .
|
|
2437
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: None, will use `win_length // 2` .
|
|
2438
|
+
pad (int, optional): Two sided padding of signal. Default: 0.
|
|
1669
2439
|
window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
|
|
1670
|
-
|
|
1671
|
-
or WindowType.KAISER
|
|
1672
|
-
power (float, optional): Exponent for the magnitude spectrogram,
|
|
1673
|
-
|
|
1674
|
-
normalized (bool, optional): Whether to normalize by magnitude after stft
|
|
1675
|
-
center (bool, optional): Whether to pad waveform on both sides
|
|
1676
|
-
pad_mode (BorderType, optional): Controls the padding method used when center is True,
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
onesided (bool, optional): Controls whether to return half of results to avoid redundancy
|
|
2440
|
+
can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN
|
|
2441
|
+
or WindowType.KAISER. Currently, Kaiser window is not supported on macOS. Default: WindowType.HANN.
|
|
2442
|
+
power (float, optional): Exponent for the magnitude spectrogram, must be non negative,
|
|
2443
|
+
e.g., 1 for energy, 2 for power, etc. Default: 2.0.
|
|
2444
|
+
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False.
|
|
2445
|
+
center (bool, optional): Whether to pad waveform on both sides. Default: True.
|
|
2446
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
|
|
2447
|
+
can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
|
|
2448
|
+
Default: BorderType.REFLECT.
|
|
2449
|
+
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True.
|
|
2450
|
+
|
|
2451
|
+
Raises:
|
|
2452
|
+
TypeError: If `n_fft` is not of type int.
|
|
2453
|
+
ValueError: If `n_fft` is not a positive number.
|
|
2454
|
+
TypeError: If `win_length` is not of type int.
|
|
2455
|
+
ValueError: If `win_length` is not a positive number.
|
|
2456
|
+
ValueError: If `win_length` is greater than `n_fft` .
|
|
2457
|
+
TypeError: If `hop_length` is not of type int.
|
|
2458
|
+
ValueError: If `hop_length` is not a positive number.
|
|
2459
|
+
TypeError: If `pad` is not of type int.
|
|
2460
|
+
ValueError: If `pad` is a negative number.
|
|
2461
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
2462
|
+
TypeError: If `power` is not of type float.
|
|
2463
|
+
ValueError: If `power` is a negative number.
|
|
2464
|
+
TypeError: If `normalized` is not of type bool.
|
|
2465
|
+
TypeError: If `center` is not of type bool.
|
|
2466
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
2467
|
+
TypeError: If `onesided` is not of type bool.
|
|
2468
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2469
|
+
|
|
2470
|
+
Supported Platforms:
|
|
2471
|
+
``CPU``
|
|
1680
2472
|
|
|
1681
2473
|
Examples:
|
|
1682
2474
|
>>> import numpy as np
|
|
@@ -1713,7 +2505,7 @@ class TimeMasking(AudioTensorOperation):
|
|
|
1713
2505
|
Apply masking to a spectrogram in the time domain.
|
|
1714
2506
|
|
|
1715
2507
|
Note:
|
|
1716
|
-
The
|
|
2508
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
1717
2509
|
|
|
1718
2510
|
Args:
|
|
1719
2511
|
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
|
|
@@ -1769,20 +2561,20 @@ class TimeStretch(AudioTensorOperation):
|
|
|
1769
2561
|
Stretch Short Time Fourier Transform (STFT) in time without modifying pitch for a given rate.
|
|
1770
2562
|
|
|
1771
2563
|
Note:
|
|
1772
|
-
The
|
|
2564
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time, complex=2>.
|
|
1773
2565
|
The first dimension represents the real part while the second represents the imaginary.
|
|
1774
2566
|
|
|
1775
2567
|
Args:
|
|
1776
2568
|
hop_length (int, optional): Length of hop between STFT windows, i.e. the number of samples
|
|
1777
|
-
between consecutive frames. Default: None, will use `n_freq - 1
|
|
2569
|
+
between consecutive frames. Default: None, will use `n_freq - 1` .
|
|
1778
2570
|
n_freq (int, optional): Number of filter banks from STFT. Default: 201.
|
|
1779
2571
|
fixed_rate (float, optional): Rate to speed up or slow down by. Default: None, will keep
|
|
1780
2572
|
the original rate.
|
|
1781
2573
|
|
|
1782
2574
|
Raises:
|
|
1783
|
-
TypeError: If `hop_length` is not of type
|
|
2575
|
+
TypeError: If `hop_length` is not of type int.
|
|
1784
2576
|
ValueError: If `hop_length` is not a positive number.
|
|
1785
|
-
TypeError: If `n_freq` is not of type
|
|
2577
|
+
TypeError: If `n_freq` is not of type int.
|
|
1786
2578
|
ValueError: If `n_freq` is not a positive number.
|
|
1787
2579
|
TypeError: If `fixed_rate` is not of type float.
|
|
1788
2580
|
ValueError: If `fixed_rate` is not a positive number.
|
|
@@ -1822,13 +2614,28 @@ class TimeStretch(AudioTensorOperation):
|
|
|
1822
2614
|
|
|
1823
2615
|
class TrebleBiquad(AudioTensorOperation):
|
|
1824
2616
|
"""
|
|
1825
|
-
Design a treble tone-control effect.
|
|
2617
|
+
Design a treble tone-control effect.
|
|
2618
|
+
|
|
2619
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1826
2620
|
|
|
1827
2621
|
Args:
|
|
1828
|
-
sample_rate (int): Sampling rate
|
|
2622
|
+
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
1829
2623
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
1830
|
-
central_freq (float, optional): Central frequency (in Hz)
|
|
1831
|
-
Q(float, optional): Quality factor
|
|
2624
|
+
central_freq (float, optional): Central frequency (in Hz). Default: 3000.
|
|
2625
|
+
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
2626
|
+
in range of (0, 1]. Default: 0.707.
|
|
2627
|
+
|
|
2628
|
+
Raises:
|
|
2629
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2630
|
+
ValueError: If `sample_rate` is 0.
|
|
2631
|
+
TypeError: If `gain` is not of type float.
|
|
2632
|
+
TypeError: If `central_freq` is not of type float.
|
|
2633
|
+
TypeError: If `Q` is not of type float.
|
|
2634
|
+
ValueError: If `Q` is not in range of (0, 1].
|
|
2635
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2636
|
+
|
|
2637
|
+
Supported Platforms:
|
|
2638
|
+
``CPU``
|
|
1832
2639
|
|
|
1833
2640
|
Examples:
|
|
1834
2641
|
>>> import numpy as np
|
|
@@ -1853,37 +2660,82 @@ class TrebleBiquad(AudioTensorOperation):
|
|
|
1853
2660
|
|
|
1854
2661
|
class Vad(AudioTensorOperation):
|
|
1855
2662
|
"""
|
|
1856
|
-
|
|
2663
|
+
Voice activity detector.
|
|
2664
|
+
|
|
2665
|
+
Attempt to trim silence and quiet background sounds from the ends of recordings of speech.
|
|
2666
|
+
|
|
2667
|
+
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1857
2668
|
|
|
1858
2669
|
Args:
|
|
1859
|
-
sample_rate (int):
|
|
1860
|
-
trigger_level (float, optional): The measurement level used to trigger activity detection
|
|
1861
|
-
trigger_time (float, optional): The time constant (in seconds) used to help ignore short
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
2670
|
+
sample_rate (int): Sampling rate of audio signal.
|
|
2671
|
+
trigger_level (float, optional): The measurement level used to trigger activity detection. Default: 7.0.
|
|
2672
|
+
trigger_time (float, optional): The time constant (in seconds) used to help ignore short bursts of
|
|
2673
|
+
sounds. Default: 0.25.
|
|
2674
|
+
search_time (float, optional): The amount of audio (in seconds) to search for quieter/shorter bursts of audio
|
|
2675
|
+
to include prior to the detected trigger point. Default: 1.0.
|
|
2676
|
+
allowed_gap (float, optional): The allowed gap (in seconds) between quieter/shorter bursts of audio to include
|
|
2677
|
+
prior to the detected trigger point. Default: 0.25.
|
|
1866
2678
|
pre_trigger_time (float, optional): The amount of audio (in seconds) to preserve before the trigger point and
|
|
1867
|
-
any found quieter/shorter bursts
|
|
1868
|
-
boot_time (float, optional): The time for the initial noise estimate
|
|
1869
|
-
noise_up_time (float, optional): Time constant used by the adaptive noise estimator
|
|
1870
|
-
increasing
|
|
1871
|
-
noise_down_time (float, optional): Time constant used by the adaptive noise estimator
|
|
1872
|
-
decreasing
|
|
1873
|
-
noise_reduction_amount (float, optional):
|
|
1874
|
-
|
|
1875
|
-
measure_freq (float, optional):
|
|
1876
|
-
measure_duration (float, optional): The duration of measurement
|
|
1877
|
-
period
|
|
1878
|
-
measure_smooth_time (float, optional):
|
|
1879
|
-
hp_filter_freq (float, optional): The
|
|
1880
|
-
detector algorithm
|
|
1881
|
-
lp_filter_freq (float, optional): The
|
|
1882
|
-
detector algorithm
|
|
1883
|
-
hp_lifter_freq (float, optional): The
|
|
1884
|
-
detector algorithm
|
|
1885
|
-
lp_lifter_freq (float, optional): The
|
|
1886
|
-
detector algorithm
|
|
2679
|
+
any found quieter/shorter bursts. Default: 0.0.
|
|
2680
|
+
boot_time (float, optional): The time for the initial noise estimate. Default: 0.35.
|
|
2681
|
+
noise_up_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level is
|
|
2682
|
+
increasing. Default: 0.1.
|
|
2683
|
+
noise_down_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level
|
|
2684
|
+
is decreasing. Default: 0.01.
|
|
2685
|
+
noise_reduction_amount (float, optional): Amount of noise reduction to use in the detection algorithm.
|
|
2686
|
+
Default: 1.35.
|
|
2687
|
+
measure_freq (float, optional): Frequency of the algorithm's processing/measurements. Default: 20.0.
|
|
2688
|
+
measure_duration (float, optional): The duration of measurement. Default: None, will use twice the measurement
|
|
2689
|
+
period.
|
|
2690
|
+
measure_smooth_time (float, optional): Time constant used to smooth spectral measurements. Default: 0.4.
|
|
2691
|
+
hp_filter_freq (float, optional): The 'Brick-wall' frequency of high-pass filter applied at the input to the
|
|
2692
|
+
detector algorithm. Default: 50.0.
|
|
2693
|
+
lp_filter_freq (float, optional): The 'Brick-wall' frequency of low-pass filter applied at the input to the
|
|
2694
|
+
detector algorithm. Default: 6000.0.
|
|
2695
|
+
hp_lifter_freq (float, optional): The 'Brick-wall' frequency of high-pass lifter used in the
|
|
2696
|
+
detector algorithm. Default: 150.0.
|
|
2697
|
+
lp_lifter_freq (float, optional): The 'Brick-wall' frequency of low-pass lifter used in the
|
|
2698
|
+
detector algorithm. Default: 2000.0.
|
|
2699
|
+
|
|
2700
|
+
Raises:
|
|
2701
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2702
|
+
ValueError: If `sample_rate` is not a positive number.
|
|
2703
|
+
TypeError: If `trigger_level` is not of type float.
|
|
2704
|
+
TypeError: If `trigger_time` is not of type float.
|
|
2705
|
+
ValueError: If `trigger_time` is a negative number.
|
|
2706
|
+
TypeError: If `search_time` is not of type float.
|
|
2707
|
+
ValueError: If `search_time` is a negative number.
|
|
2708
|
+
TypeError: If `allowed_gap` is not of type float.
|
|
2709
|
+
ValueError: If `allowed_gap` is a negative number.
|
|
2710
|
+
TypeError: If `pre_trigger_time` is not of type float.
|
|
2711
|
+
ValueError: If `pre_trigger_time` is a negative number.
|
|
2712
|
+
TypeError: If `boot_time` is not of type float.
|
|
2713
|
+
ValueError: If `boot_time` is a negative number.
|
|
2714
|
+
TypeError: If `noise_up_time` is not of type float.
|
|
2715
|
+
ValueError: If `noise_up_time` is a negative number.
|
|
2716
|
+
TypeError: If `noise_down_time` is not of type float.
|
|
2717
|
+
ValueError: If `noise_down_time` is a negative number.
|
|
2718
|
+
ValueError: If `noise_up_time` is less than `noise_down_time` .
|
|
2719
|
+
TypeError: If `noise_reduction_amount` is not of type float.
|
|
2720
|
+
ValueError: If `noise_reduction_amount` is a negative number.
|
|
2721
|
+
TypeError: If `measure_freq` is not of type float.
|
|
2722
|
+
ValueError: If `measure_freq` is not a positive number.
|
|
2723
|
+
TypeError: If `measure_duration` is not of type float.
|
|
2724
|
+
ValueError: If `measure_duration` is a negative number.
|
|
2725
|
+
TypeError: If `measure_smooth_time` is not of type float.
|
|
2726
|
+
ValueError: If `measure_smooth_time` is a negative number.
|
|
2727
|
+
TypeError: If `hp_filter_freq` is not of type float.
|
|
2728
|
+
ValueError: If `hp_filter_freq` is not a positive number.
|
|
2729
|
+
TypeError: If `lp_filter_freq` is not of type float.
|
|
2730
|
+
ValueError: If `lp_filter_freq` is not a positive number.
|
|
2731
|
+
TypeError: If `hp_lifter_freq` is not of type float.
|
|
2732
|
+
ValueError: If `hp_lifter_freq` is not a positive number.
|
|
2733
|
+
TypeError: If `lp_lifter_freq` is not of type float.
|
|
2734
|
+
ValueError: If `lp_lifter_freq` is not a positive number.
|
|
2735
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2736
|
+
|
|
2737
|
+
Supported Platforms:
|
|
2738
|
+
``CPU``
|
|
1887
2739
|
|
|
1888
2740
|
Examples:
|
|
1889
2741
|
>>> import numpy as np
|
|
@@ -1933,15 +2785,25 @@ DE_C_GAIN_TYPE = {GainType.AMPLITUDE: cde.GainType.DE_GAIN_TYPE_AMPLITUDE,
|
|
|
1933
2785
|
|
|
1934
2786
|
class Vol(AudioTensorOperation):
|
|
1935
2787
|
"""
|
|
1936
|
-
|
|
2788
|
+
Adjust volume of waveform.
|
|
1937
2789
|
|
|
1938
2790
|
Args:
|
|
1939
|
-
gain (float):
|
|
1940
|
-
If gain_type
|
|
1941
|
-
If gain_type
|
|
1942
|
-
If gain_type
|
|
1943
|
-
gain_type (GainType, optional): Type of gain,
|
|
1944
|
-
|
|
2791
|
+
gain (float): Gain at the boost (or attenuation).
|
|
2792
|
+
If `gain_type` is GainType.AMPLITUDE, it is a non negative amplitude ratio.
|
|
2793
|
+
If `gain_type` is GainType.POWER, it is a power (voltage squared).
|
|
2794
|
+
If `gain_type` is GainType.DB, it is in decibels.
|
|
2795
|
+
gain_type (GainType, optional): Type of gain, can be GainType.AMPLITUDE, GainType.POWER
|
|
2796
|
+
or GainType.DB. Default: GainType.AMPLITUDE.
|
|
2797
|
+
|
|
2798
|
+
Raises:
|
|
2799
|
+
TypeError: If `gain` is not of type float.
|
|
2800
|
+
TypeError: If `gain_type` is not of type :class:`mindspore.dataset.audio.GainType` .
|
|
2801
|
+
ValueError: If `gain` is a negative number when `gain_type` is GainType.AMPLITUDE.
|
|
2802
|
+
ValueError: If `gain` is not a positive number when `gain_type` is GainType.POWER.
|
|
2803
|
+
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2804
|
+
|
|
2805
|
+
Supported Platforms:
|
|
2806
|
+
``CPU``
|
|
1945
2807
|
|
|
1946
2808
|
Examples:
|
|
1947
2809
|
>>> import numpy as np
|