mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -75,7 +75,7 @@ class AllpassBiquad(AudioTensorOperation):
|
|
|
75
75
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
76
76
|
central_freq (float): Central frequency (in Hz).
|
|
77
77
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
78
|
-
in range of (0, 1]. Default: 0.707
|
|
78
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
79
79
|
|
|
80
80
|
Raises:
|
|
81
81
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -90,11 +90,16 @@ class AllpassBiquad(AudioTensorOperation):
|
|
|
90
90
|
|
|
91
91
|
Examples:
|
|
92
92
|
>>> import numpy as np
|
|
93
|
+
>>> import mindspore.dataset as ds
|
|
93
94
|
>>>
|
|
94
95
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
95
96
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
96
|
-
>>> transforms = [audio.AllpassBiquad(44100, 200.0)]
|
|
97
|
+
>>> transforms = [ds.audio.AllpassBiquad(44100, 200.0)]
|
|
97
98
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
99
|
+
|
|
100
|
+
Tutorial Examples:
|
|
101
|
+
- `Illustration of audio transforms
|
|
102
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
98
103
|
"""
|
|
99
104
|
|
|
100
105
|
@check_allpass_biquad
|
|
@@ -121,15 +126,15 @@ class AmplitudeToDB(AudioTensorOperation):
|
|
|
121
126
|
|
|
122
127
|
Args:
|
|
123
128
|
stype (ScaleType, optional): Scale of the input waveform, which can be
|
|
124
|
-
ScaleType.POWER or ScaleType.MAGNITUDE
|
|
129
|
+
``ScaleType.POWER`` or ``ScaleType.MAGNITUDE``. Default: ``ScaleType.POWER``.
|
|
125
130
|
ref_value (float, optional): Multiplier reference value for generating
|
|
126
|
-
`db_multiplier` . Default: 1.0
|
|
131
|
+
`db_multiplier` . Default: ``1.0``. The formula is
|
|
127
132
|
|
|
128
|
-
:math:`\text{db_multiplier} =
|
|
133
|
+
:math:`\text{db_multiplier} = \log10(\max(\text{ref_value}, amin))` .
|
|
129
134
|
|
|
130
135
|
amin (float, optional): Lower bound to clamp the input waveform, which must
|
|
131
|
-
be greater than zero. Default: 1e-10
|
|
132
|
-
top_db (float, optional): Minimum cut-off decibels, which must be non-negative. Default: 80.0
|
|
136
|
+
be greater than zero. Default: ``1e-10``.
|
|
137
|
+
top_db (float, optional): Minimum cut-off decibels, which must be non-negative. Default: ``80.0``.
|
|
133
138
|
|
|
134
139
|
Raises:
|
|
135
140
|
TypeError: If `stype` is not of type :class:`mindspore.dataset.audio.ScaleType` .
|
|
@@ -146,12 +151,18 @@ class AmplitudeToDB(AudioTensorOperation):
|
|
|
146
151
|
|
|
147
152
|
Examples:
|
|
148
153
|
>>> import numpy as np
|
|
154
|
+
>>> import mindspore.dataset as ds
|
|
155
|
+
>>> import mindspore.dataset.audio as audio
|
|
149
156
|
>>> from mindspore.dataset.audio import ScaleType
|
|
150
157
|
>>>
|
|
151
158
|
>>> waveform = np.random.random([1, 400 // 2 + 1, 30])
|
|
152
159
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
153
160
|
>>> transforms = [audio.AmplitudeToDB(stype=ScaleType.POWER)]
|
|
154
161
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
162
|
+
|
|
163
|
+
Tutorial Examples:
|
|
164
|
+
- `Illustration of audio transforms
|
|
165
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
155
166
|
"""
|
|
156
167
|
|
|
157
168
|
@check_amplitude_to_db
|
|
@@ -182,11 +193,17 @@ class Angle(AudioTensorOperation):
|
|
|
182
193
|
|
|
183
194
|
Examples:
|
|
184
195
|
>>> import numpy as np
|
|
196
|
+
>>> import mindspore.dataset as ds
|
|
197
|
+
>>> import mindspore.dataset.audio as audio
|
|
185
198
|
>>>
|
|
186
199
|
>>> waveform = np.array([[1.43, 5.434], [23.54, 89.38]])
|
|
187
200
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
188
201
|
>>> transforms = [audio.Angle()]
|
|
189
202
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
203
|
+
|
|
204
|
+
Tutorial Examples:
|
|
205
|
+
- `Illustration of audio transforms
|
|
206
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
190
207
|
"""
|
|
191
208
|
|
|
192
209
|
def parse(self):
|
|
@@ -210,9 +227,10 @@ class BandBiquad(AudioTensorOperation):
|
|
|
210
227
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
211
228
|
central_freq (float): Central frequency (in Hz).
|
|
212
229
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
213
|
-
in range of (0, 1]. Default: 0.707
|
|
214
|
-
noise (bool, optional) : If True
|
|
215
|
-
If False
|
|
230
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
231
|
+
noise (bool, optional) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
|
|
232
|
+
If ``False``, uses mode oriented to pitched audio, i.e. voice, singing, or instrumental music.
|
|
233
|
+
Default: ``False``.
|
|
216
234
|
|
|
217
235
|
Raises:
|
|
218
236
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -228,11 +246,17 @@ class BandBiquad(AudioTensorOperation):
|
|
|
228
246
|
|
|
229
247
|
Examples:
|
|
230
248
|
>>> import numpy as np
|
|
249
|
+
>>> import mindspore.dataset as ds
|
|
250
|
+
>>> import mindspore.dataset.audio as audio
|
|
231
251
|
>>>
|
|
232
252
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
233
253
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
234
254
|
>>> transforms = [audio.BandBiquad(44100, 200.0)]
|
|
235
255
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
256
|
+
|
|
257
|
+
Tutorial Examples:
|
|
258
|
+
- `Illustration of audio transforms
|
|
259
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
236
260
|
"""
|
|
237
261
|
|
|
238
262
|
@check_band_biquad
|
|
@@ -271,9 +295,9 @@ class BandpassBiquad(AudioTensorOperation):
|
|
|
271
295
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
272
296
|
central_freq (float): Central frequency (in Hz).
|
|
273
297
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
274
|
-
in range of (0, 1]. Default: 0.707
|
|
275
|
-
const_skirt_gain (bool, optional) : If True
|
|
276
|
-
If False
|
|
298
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
299
|
+
const_skirt_gain (bool, optional) : If ``True``, uses a constant skirt gain (peak gain = Q);
|
|
300
|
+
If ``False``, uses a constant 0dB peak gain. Default: ``False``.
|
|
277
301
|
|
|
278
302
|
Raises:
|
|
279
303
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -289,11 +313,17 @@ class BandpassBiquad(AudioTensorOperation):
|
|
|
289
313
|
|
|
290
314
|
Examples:
|
|
291
315
|
>>> import numpy as np
|
|
316
|
+
>>> import mindspore.dataset as ds
|
|
317
|
+
>>> import mindspore.dataset.audio as audio
|
|
292
318
|
>>>
|
|
293
319
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
294
320
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
295
321
|
>>> transforms = [audio.BandpassBiquad(44100, 200.0)]
|
|
296
322
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
323
|
+
|
|
324
|
+
Tutorial Examples:
|
|
325
|
+
- `Illustration of audio transforms
|
|
326
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
297
327
|
"""
|
|
298
328
|
|
|
299
329
|
@check_bandpass_biquad
|
|
@@ -330,7 +360,7 @@ class BandrejectBiquad(AudioTensorOperation):
|
|
|
330
360
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
331
361
|
central_freq (float): Central frequency (in Hz).
|
|
332
362
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
333
|
-
in range of (0, 1]. Default: 0.707
|
|
363
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
334
364
|
|
|
335
365
|
Raises:
|
|
336
366
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -345,11 +375,17 @@ class BandrejectBiquad(AudioTensorOperation):
|
|
|
345
375
|
|
|
346
376
|
Examples:
|
|
347
377
|
>>> import numpy as np
|
|
378
|
+
>>> import mindspore.dataset as ds
|
|
379
|
+
>>> import mindspore.dataset.audio as audio
|
|
348
380
|
>>>
|
|
349
381
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
|
|
350
382
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
351
383
|
>>> transforms = [audio.BandrejectBiquad(44100, 200.0)]
|
|
352
384
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
385
|
+
|
|
386
|
+
Tutorial Examples:
|
|
387
|
+
- `Illustration of audio transforms
|
|
388
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
353
389
|
"""
|
|
354
390
|
|
|
355
391
|
@check_bandreject_biquad
|
|
@@ -381,9 +417,9 @@ class BassBiquad(AudioTensorOperation):
|
|
|
381
417
|
Args:
|
|
382
418
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
383
419
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
384
|
-
central_freq (float, optional): Central frequency (in Hz). Default: 100.0
|
|
420
|
+
central_freq (float, optional): Central frequency (in Hz). Default: ``100.0``.
|
|
385
421
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
386
|
-
in range of (0, 1]. Default: 0.707
|
|
422
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
387
423
|
|
|
388
424
|
Raises:
|
|
389
425
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -399,11 +435,17 @@ class BassBiquad(AudioTensorOperation):
|
|
|
399
435
|
|
|
400
436
|
Examples:
|
|
401
437
|
>>> import numpy as np
|
|
438
|
+
>>> import mindspore.dataset as ds
|
|
439
|
+
>>> import mindspore.dataset.audio as audio
|
|
402
440
|
>>>
|
|
403
441
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
404
442
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
405
443
|
>>> transforms = [audio.BassBiquad(44100, 100.0)]
|
|
406
444
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
445
|
+
|
|
446
|
+
Tutorial Examples:
|
|
447
|
+
- `Illustration of audio transforms
|
|
448
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
407
449
|
"""
|
|
408
450
|
|
|
409
451
|
@check_bass_biquad
|
|
@@ -445,10 +487,15 @@ class Biquad(TensorOperation):
|
|
|
445
487
|
|
|
446
488
|
Examples:
|
|
447
489
|
>>> import numpy as np
|
|
490
|
+
>>> import mindspore.dataset.audio as audio
|
|
448
491
|
>>>
|
|
449
492
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
450
493
|
>>> biquad_op = audio.Biquad(0.01, 0.02, 0.13, 1, 0.12, 0.3)
|
|
451
494
|
>>> waveform_filtered = biquad_op(waveform)
|
|
495
|
+
|
|
496
|
+
Tutorial Examples:
|
|
497
|
+
- `Illustration of audio transforms
|
|
498
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
452
499
|
"""
|
|
453
500
|
|
|
454
501
|
@check_biquad
|
|
@@ -474,7 +521,7 @@ class ComplexNorm(AudioTensorOperation):
|
|
|
474
521
|
The first dimension represents the real part while the second represents the imaginary.
|
|
475
522
|
|
|
476
523
|
Args:
|
|
477
|
-
power (float, optional): Power of the norm, which must be non-negative. Default: 1.0
|
|
524
|
+
power (float, optional): Power of the norm, which must be non-negative. Default: ``1.0``.
|
|
478
525
|
|
|
479
526
|
Raises:
|
|
480
527
|
TypeError: If `power` is not of type float.
|
|
@@ -486,11 +533,17 @@ class ComplexNorm(AudioTensorOperation):
|
|
|
486
533
|
|
|
487
534
|
Examples:
|
|
488
535
|
>>> import numpy as np
|
|
536
|
+
>>> import mindspore.dataset as ds
|
|
537
|
+
>>> import mindspore.dataset.audio as audio
|
|
489
538
|
>>>
|
|
490
539
|
>>> waveform = np.random.random([2, 4, 2])
|
|
491
540
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
492
541
|
>>> transforms = [audio.ComplexNorm()]
|
|
493
542
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
543
|
+
|
|
544
|
+
Tutorial Examples:
|
|
545
|
+
- `Illustration of audio transforms
|
|
546
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
494
547
|
"""
|
|
495
548
|
|
|
496
549
|
@check_complex_norm
|
|
@@ -524,15 +577,15 @@ class ComputeDeltas(AudioTensorOperation):
|
|
|
524
577
|
at time :math:`t` , :math:`N` is :math:`(\text{win_length} - 1) // 2` .
|
|
525
578
|
|
|
526
579
|
Args:
|
|
527
|
-
win_length (int, optional): The window length used for computing delta, must be no less than 3. Default: 5
|
|
528
|
-
pad_mode (BorderType, optional): Mode parameter passed to padding, can be BorderType.CONSTANT
|
|
529
|
-
BorderType.REFLECT or BorderType.SYMMETRIC
|
|
580
|
+
win_length (int, optional): The window length used for computing delta, must be no less than 3. Default: ``5``.
|
|
581
|
+
pad_mode (BorderType, optional): Mode parameter passed to padding, can be ``BorderType.CONSTANT``,
|
|
582
|
+
``BorderType.EDGE``, ``BorderType.REFLECT`` or ``BorderType.SYMMETRIC``. Default: ``BorderType.EDGE``.
|
|
530
583
|
|
|
531
|
-
- BorderType.CONSTANT
|
|
532
|
-
- BorderType.EDGE
|
|
533
|
-
- BorderType.REFLECT
|
|
584
|
+
- ``BorderType.CONSTANT``, pad with a constant value.
|
|
585
|
+
- ``BorderType.EDGE``, pad with the last value on the edge.
|
|
586
|
+
- ``BorderType.REFLECT``, reflect the value on the edge while omitting the last one.
|
|
534
587
|
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
|
|
535
|
-
- BorderType.SYMMETRIC
|
|
588
|
+
- ``BorderType.SYMMETRIC``, reflect the value on the edge while repeating the last one.
|
|
536
589
|
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
|
|
537
590
|
|
|
538
591
|
Raises:
|
|
@@ -546,12 +599,18 @@ class ComputeDeltas(AudioTensorOperation):
|
|
|
546
599
|
|
|
547
600
|
Examples:
|
|
548
601
|
>>> import numpy as np
|
|
602
|
+
>>> import mindspore.dataset as ds
|
|
603
|
+
>>> import mindspore.dataset.audio as audio
|
|
549
604
|
>>> from mindspore.dataset.audio import BorderType
|
|
550
605
|
>>>
|
|
551
606
|
>>> waveform = np.random.random([1, 400 // 2 + 1, 30])
|
|
552
607
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
553
608
|
>>> transforms = [audio.ComputeDeltas(win_length=7, pad_mode=BorderType.EDGE)]
|
|
554
609
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
610
|
+
|
|
611
|
+
Tutorial Examples:
|
|
612
|
+
- `Illustration of audio transforms
|
|
613
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
555
614
|
"""
|
|
556
615
|
|
|
557
616
|
@check_compute_deltas
|
|
@@ -577,7 +636,7 @@ class Contrast(AudioTensorOperation):
|
|
|
577
636
|
|
|
578
637
|
Args:
|
|
579
638
|
enhancement_amount (float, optional): Controls the amount of the enhancement,
|
|
580
|
-
in range of [0, 100]. Default: 75.0
|
|
639
|
+
in range of [0, 100]. Default: ``75.0``. Note that `enhancement_amount` equal
|
|
581
640
|
to 0 still gives a significant contrast enhancement.
|
|
582
641
|
|
|
583
642
|
Raises:
|
|
@@ -590,11 +649,17 @@ class Contrast(AudioTensorOperation):
|
|
|
590
649
|
|
|
591
650
|
Examples:
|
|
592
651
|
>>> import numpy as np
|
|
652
|
+
>>> import mindspore.dataset as ds
|
|
653
|
+
>>> import mindspore.dataset.audio as audio
|
|
593
654
|
>>>
|
|
594
655
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
595
656
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
596
657
|
>>> transforms = [audio.Contrast()]
|
|
597
658
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
659
|
+
|
|
660
|
+
Tutorial Examples:
|
|
661
|
+
- `Illustration of audio transforms
|
|
662
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
598
663
|
"""
|
|
599
664
|
|
|
600
665
|
@check_contrast
|
|
@@ -623,11 +688,17 @@ class DBToAmplitude(AudioTensorOperation):
|
|
|
623
688
|
|
|
624
689
|
Examples:
|
|
625
690
|
>>> import numpy as np
|
|
691
|
+
>>> import mindspore.dataset as ds
|
|
692
|
+
>>> import mindspore.dataset.audio as audio
|
|
626
693
|
>>>
|
|
627
694
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
628
695
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
629
696
|
>>> transforms = [audio.DBToAmplitude(0.5, 0.5)]
|
|
630
697
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
698
|
+
|
|
699
|
+
Tutorial Examples:
|
|
700
|
+
- `Illustration of audio transforms
|
|
701
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
631
702
|
"""
|
|
632
703
|
|
|
633
704
|
@check_db_to_amplitude
|
|
@@ -647,7 +718,8 @@ class DCShift(AudioTensorOperation):
|
|
|
647
718
|
Args:
|
|
648
719
|
shift (float): The amount to shift the audio, the value must be in the range [-2.0, 2.0].
|
|
649
720
|
limiter_gain (float, optional): Used only on peaks to prevent clipping,
|
|
650
|
-
the value should be much less than 1, such as 0.05 or 0.02
|
|
721
|
+
the value should be much less than 1, such as ``0.05`` or ``0.02``. Default: ``None``,
|
|
722
|
+
will be set to `shift` .
|
|
651
723
|
|
|
652
724
|
Raises:
|
|
653
725
|
TypeError: If `shift` is not of type float.
|
|
@@ -659,11 +731,17 @@ class DCShift(AudioTensorOperation):
|
|
|
659
731
|
|
|
660
732
|
Examples:
|
|
661
733
|
>>> import numpy as np
|
|
734
|
+
>>> import mindspore.dataset as ds
|
|
735
|
+
>>> import mindspore.dataset.audio as audio
|
|
662
736
|
>>>
|
|
663
737
|
>>> waveform = np.array([0.60, 0.97, -1.04, -1.26, 0.97, 0.91, 0.48, 0.93])
|
|
664
738
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
665
739
|
>>> transforms = [audio.DCShift(0.5, 0.02)]
|
|
666
740
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
741
|
+
|
|
742
|
+
Tutorial Examples:
|
|
743
|
+
- `Illustration of audio transforms
|
|
744
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
667
745
|
"""
|
|
668
746
|
|
|
669
747
|
@check_dc_shift
|
|
@@ -695,11 +773,17 @@ class DeemphBiquad(AudioTensorOperation):
|
|
|
695
773
|
|
|
696
774
|
Examples:
|
|
697
775
|
>>> import numpy as np
|
|
776
|
+
>>> import mindspore.dataset as ds
|
|
777
|
+
>>> import mindspore.dataset.audio as audio
|
|
698
778
|
>>>
|
|
699
779
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
700
780
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
701
781
|
>>> transforms = [audio.DeemphBiquad(44100)]
|
|
702
782
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
783
|
+
|
|
784
|
+
Tutorial Examples:
|
|
785
|
+
- `Illustration of audio transforms
|
|
786
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
703
787
|
"""
|
|
704
788
|
|
|
705
789
|
@check_deemph_biquad
|
|
@@ -718,14 +802,14 @@ class DetectPitchFrequency(AudioTensorOperation):
|
|
|
718
802
|
It is implemented using normalized cross-correlation function and median smoothing.
|
|
719
803
|
|
|
720
804
|
Args:
|
|
721
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
|
|
722
|
-
frame_time (float, optional): Duration of a frame, the value must be greater than zero. Default: 0.01
|
|
805
|
+
sample_rate (int): Sampling rate of the waveform, e.g. ``44100`` (Hz), the value can't be zero.
|
|
806
|
+
frame_time (float, optional): Duration of a frame, the value must be greater than zero. Default: ``0.01``.
|
|
723
807
|
win_length (int, optional): The window length for median smoothing (in number of frames), the value must be
|
|
724
|
-
greater than zero. Default: 30
|
|
808
|
+
greater than zero. Default: ``30``.
|
|
725
809
|
freq_low (int, optional): Lowest frequency that can be detected (Hz), the value must be greater than zero.
|
|
726
|
-
Default: 85
|
|
810
|
+
Default: ``85``.
|
|
727
811
|
freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero.
|
|
728
|
-
Default: 3400
|
|
812
|
+
Default: ``3400``.
|
|
729
813
|
|
|
730
814
|
Raises:
|
|
731
815
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -744,12 +828,18 @@ class DetectPitchFrequency(AudioTensorOperation):
|
|
|
744
828
|
|
|
745
829
|
Examples:
|
|
746
830
|
>>> import numpy as np
|
|
831
|
+
>>> import mindspore.dataset as ds
|
|
832
|
+
>>> import mindspore.dataset.audio as audio
|
|
747
833
|
>>>
|
|
748
834
|
>>> waveform = np.array([[0.716064e-03, 5.347656e-03, 6.246826e-03, 2.089477e-02, 7.138305e-02],
|
|
749
835
|
... [4.156616e-02, 1.394653e-02, 3.550292e-02, 0.614379e-02, 3.840209e-02]])
|
|
750
836
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
751
837
|
>>> transforms = [audio.DetectPitchFrequency(30, 0.1, 3, 5, 25)]
|
|
752
838
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
839
|
+
|
|
840
|
+
Tutorial Examples:
|
|
841
|
+
- `Illustration of audio transforms
|
|
842
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
753
843
|
"""
|
|
754
844
|
|
|
755
845
|
@check_detect_pitch_frequency
|
|
@@ -778,12 +868,12 @@ class Dither(AudioTensorOperation):
|
|
|
778
868
|
|
|
779
869
|
Args:
|
|
780
870
|
density_function (DensityFunction, optional): The density function of a continuous
|
|
781
|
-
random variable, can be DensityFunction.TPDF (Triangular Probability Density Function),
|
|
782
|
-
DensityFunction.RPDF (Rectangular Probability Density Function) or
|
|
783
|
-
DensityFunction.GPDF (Gaussian Probability Density Function).
|
|
784
|
-
Default: DensityFunction.TPDF
|
|
871
|
+
random variable, can be ``DensityFunction.TPDF`` (Triangular Probability Density Function),
|
|
872
|
+
``DensityFunction.RPDF`` (Rectangular Probability Density Function) or
|
|
873
|
+
``DensityFunction.GPDF`` (Gaussian Probability Density Function).
|
|
874
|
+
Default: ``DensityFunction.TPDF``.
|
|
785
875
|
noise_shaping (bool, optional): A filtering process that shapes the spectral
|
|
786
|
-
energy of quantisation error. Default: False
|
|
876
|
+
energy of quantisation error. Default: ``False``.
|
|
787
877
|
|
|
788
878
|
Raises:
|
|
789
879
|
TypeError: If `density_function` is not of type :class:`mindspore.dataset.audio.DensityFunction` .
|
|
@@ -795,11 +885,17 @@ class Dither(AudioTensorOperation):
|
|
|
795
885
|
|
|
796
886
|
Examples:
|
|
797
887
|
>>> import numpy as np
|
|
888
|
+
>>> import mindspore.dataset as ds
|
|
889
|
+
>>> import mindspore.dataset.audio as audio
|
|
798
890
|
>>>
|
|
799
891
|
>>> waveform = np.array([[1, 2, 3], [4, 5, 6]])
|
|
800
892
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
801
893
|
>>> transforms = [audio.Dither()]
|
|
802
894
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
895
|
+
|
|
896
|
+
Tutorial Examples:
|
|
897
|
+
- `Illustration of audio transforms
|
|
898
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
803
899
|
"""
|
|
804
900
|
|
|
805
901
|
@check_dither
|
|
@@ -819,10 +915,10 @@ class EqualizerBiquad(AudioTensorOperation):
|
|
|
819
915
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
820
916
|
|
|
821
917
|
Args:
|
|
822
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
918
|
+
sample_rate (int): Sampling rate of the waveform, e.g. ``44100`` (Hz), the value can't be 0.
|
|
823
919
|
center_freq (float): Central frequency (in Hz).
|
|
824
920
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
825
|
-
Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707
|
|
921
|
+
Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: ``0.707``.
|
|
826
922
|
|
|
827
923
|
Raises:
|
|
828
924
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -837,11 +933,17 @@ class EqualizerBiquad(AudioTensorOperation):
|
|
|
837
933
|
|
|
838
934
|
Examples:
|
|
839
935
|
>>> import numpy as np
|
|
936
|
+
>>> import mindspore.dataset as ds
|
|
937
|
+
>>> import mindspore.dataset.audio as audio
|
|
840
938
|
>>>
|
|
841
939
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
842
940
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
843
941
|
>>> transforms = [audio.EqualizerBiquad(44100, 1500, 5.5, 0.7)]
|
|
844
942
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
943
|
+
|
|
944
|
+
Tutorial Examples:
|
|
945
|
+
- `Illustration of audio transforms
|
|
946
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
845
947
|
"""
|
|
846
948
|
|
|
847
949
|
@check_equalizer_biquad
|
|
@@ -868,20 +970,20 @@ class Fade(AudioTensorOperation):
|
|
|
868
970
|
Add a fade in and/or fade out to an waveform.
|
|
869
971
|
|
|
870
972
|
Args:
|
|
871
|
-
fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative. Default: 0
|
|
872
|
-
fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative. Default: 0
|
|
973
|
+
fade_in_len (int, optional): Length of fade-in (time frames), which must be non-negative. Default: ``0``.
|
|
974
|
+
fade_out_len (int, optional): Length of fade-out (time frames), which must be non-negative. Default: ``0``.
|
|
873
975
|
fade_shape (FadeShape, optional): Shape of fade, five different types can be chosen as defined in FadeShape.
|
|
874
|
-
Default: FadeShape.LINEAR
|
|
976
|
+
Default: ``FadeShape.LINEAR``.
|
|
875
977
|
|
|
876
|
-
-FadeShape.QUARTER_SINE
|
|
978
|
+
- ``FadeShape.QUARTER_SINE``, means it tend to 0 in an quarter sin function.
|
|
877
979
|
|
|
878
|
-
-FadeShape.HALF_SINE
|
|
980
|
+
- ``FadeShape.HALF_SINE``, means it tend to 0 in an half sin function.
|
|
879
981
|
|
|
880
|
-
-FadeShape.LINEAR
|
|
982
|
+
- ``FadeShape.LINEAR``, means it linear to 0.
|
|
881
983
|
|
|
882
|
-
-FadeShape.LOGARITHMIC
|
|
984
|
+
- ``FadeShape.LOGARITHMIC``, means it tend to 0 in an logrithmic function.
|
|
883
985
|
|
|
884
|
-
-FadeShape.EXPONENTIAL
|
|
986
|
+
- ``FadeShape.EXPONENTIAL``, means it tend to 0 in an exponential function.
|
|
885
987
|
|
|
886
988
|
Raises:
|
|
887
989
|
RuntimeError: If fade_in_len exceeds waveform length.
|
|
@@ -892,12 +994,18 @@ class Fade(AudioTensorOperation):
|
|
|
892
994
|
|
|
893
995
|
Examples:
|
|
894
996
|
>>> import numpy as np
|
|
997
|
+
>>> import mindspore.dataset as ds
|
|
998
|
+
>>> import mindspore.dataset.audio as audio
|
|
895
999
|
>>> from mindspore.dataset.audio import FadeShape
|
|
896
1000
|
>>>
|
|
897
1001
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03, 9.246826171875e-03, 1.0894775390625e-02]])
|
|
898
1002
|
>>> dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
899
1003
|
>>> transforms = [audio.Fade(fade_in_len=3, fade_out_len=2, fade_shape=FadeShape.LINEAR)]
|
|
900
1004
|
>>> dataset = dataset.map(operations=transforms, input_columns=["audio"])
|
|
1005
|
+
|
|
1006
|
+
Tutorial Examples:
|
|
1007
|
+
- `Illustration of audio transforms
|
|
1008
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
901
1009
|
"""
|
|
902
1010
|
|
|
903
1011
|
@check_fade
|
|
@@ -922,7 +1030,8 @@ class Filtfilt(AudioTensorOperation):
|
|
|
922
1030
|
b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
|
|
923
1031
|
Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
|
|
924
1032
|
Must be same size as a_coeffs (pad with 0's as necessary).
|
|
925
|
-
clamp (bool, optional): If True
|
|
1033
|
+
clamp (bool, optional): If ``True``, clamp the output signal to be in the range [-1, 1].
|
|
1034
|
+
Default: ``True``.
|
|
926
1035
|
|
|
927
1036
|
Raises:
|
|
928
1037
|
TypeError: If `a_coeffs` is not of type Sequence[float].
|
|
@@ -933,6 +1042,8 @@ class Filtfilt(AudioTensorOperation):
|
|
|
933
1042
|
|
|
934
1043
|
Examples:
|
|
935
1044
|
>>> import numpy as np
|
|
1045
|
+
>>> import mindspore.dataset as ds
|
|
1046
|
+
>>> import mindspore.dataset.audio as audio
|
|
936
1047
|
>>>
|
|
937
1048
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
938
1049
|
>>> a_coeffs = [0.1, 0.2, 0.3]
|
|
@@ -940,6 +1051,10 @@ class Filtfilt(AudioTensorOperation):
|
|
|
940
1051
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
941
1052
|
>>> transforms = [audio.Filtfilt(a_coeffs, b_coeffs)]
|
|
942
1053
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1054
|
+
|
|
1055
|
+
Tutorial Examples:
|
|
1056
|
+
- `Illustration of audio transforms
|
|
1057
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
943
1058
|
"""
|
|
944
1059
|
|
|
945
1060
|
@check_lfilter
|
|
@@ -968,16 +1083,16 @@ class Flanger(AudioTensorOperation):
|
|
|
968
1083
|
|
|
969
1084
|
Args:
|
|
970
1085
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
|
|
971
|
-
delay (float, optional): Desired delay in milliseconds, in range of [0, 30]. Default: 0.0
|
|
972
|
-
depth (float, optional): Desired delay depth in milliseconds, in range of [0, 10]. Default: 2.0
|
|
973
|
-
regen (float, optional): Desired regen (feedback gain) in dB, in range of [-95, 95]. Default: 0.0
|
|
974
|
-
width (float, optional): Desired width (delay gain) in dB, in range of [0, 100]. Default: 71.0
|
|
975
|
-
speed (float, optional): Modulation speed in Hz, in range of [0.1, 10]. Default: 0.5
|
|
976
|
-
phase (float, optional): Percentage phase-shift for multi-channel, in range of [0, 100]. Default: 25.0
|
|
977
|
-
modulation (Modulation, optional): Modulation method, can be Modulation.SINUSOIDAL or
|
|
978
|
-
Default: Modulation.SINUSOIDAL
|
|
979
|
-
interpolation (Interpolation, optional): Interpolation method, can be Interpolation.LINEAR or
|
|
980
|
-
Interpolation.QUADRATIC
|
|
1086
|
+
delay (float, optional): Desired delay in milliseconds, in range of [0, 30]. Default: ``0.0``.
|
|
1087
|
+
depth (float, optional): Desired delay depth in milliseconds, in range of [0, 10]. Default: ``2.0``.
|
|
1088
|
+
regen (float, optional): Desired regen (feedback gain) in dB, in range of [-95, 95]. Default: ``0.0``.
|
|
1089
|
+
width (float, optional): Desired width (delay gain) in dB, in range of [0, 100]. Default: ``71.0``.
|
|
1090
|
+
speed (float, optional): Modulation speed in Hz, in range of [0.1, 10]. Default: ``0.5``.
|
|
1091
|
+
phase (float, optional): Percentage phase-shift for multi-channel, in range of [0, 100]. Default: ``25.0``.
|
|
1092
|
+
modulation (Modulation, optional): Modulation method, can be ``Modulation.SINUSOIDAL`` or
|
|
1093
|
+
``Modulation.TRIANGULAR``. Default: ``Modulation.SINUSOIDAL``.
|
|
1094
|
+
interpolation (Interpolation, optional): Interpolation method, can be ``Interpolation.LINEAR`` or
|
|
1095
|
+
``Interpolation.QUADRATIC``. Default: ``Interpolation.LINEAR``.
|
|
981
1096
|
|
|
982
1097
|
Raises:
|
|
983
1098
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -1003,11 +1118,17 @@ class Flanger(AudioTensorOperation):
|
|
|
1003
1118
|
|
|
1004
1119
|
Examples:
|
|
1005
1120
|
>>> import numpy as np
|
|
1121
|
+
>>> import mindspore.dataset as ds
|
|
1122
|
+
>>> import mindspore.dataset.audio as audio
|
|
1006
1123
|
>>>
|
|
1007
1124
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
1008
1125
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1009
1126
|
>>> transforms = [audio.Flanger(44100)]
|
|
1010
1127
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1128
|
+
|
|
1129
|
+
Tutorial Examples:
|
|
1130
|
+
- `Illustration of audio transforms
|
|
1131
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1011
1132
|
"""
|
|
1012
1133
|
|
|
1013
1134
|
@check_flanger
|
|
@@ -1038,15 +1159,15 @@ class FrequencyMasking(AudioTensorOperation):
|
|
|
1038
1159
|
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
1039
1160
|
|
|
1040
1161
|
Args:
|
|
1041
|
-
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False
|
|
1042
|
-
freq_mask_param (int, optional): When `iid_masks` is True
|
|
1043
|
-
from [0, freq_mask_param]; When `iid_masks` is False
|
|
1162
|
+
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: ``False``.
|
|
1163
|
+
freq_mask_param (int, optional): When `iid_masks` is ``True``, length of the mask will be uniformly sampled
|
|
1164
|
+
from [0, freq_mask_param]; When `iid_masks` is ``False``, directly use it as length of the mask.
|
|
1044
1165
|
The value should be in range of [0, freq_length], where `freq_length` is the length of audio waveform
|
|
1045
|
-
in frequency domain. Default: 0
|
|
1046
|
-
mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is True
|
|
1047
|
-
be in range of [0, freq_length - freq_mask_param], where `freq_length` is
|
|
1048
|
-
in frequency domain. Default: 0
|
|
1049
|
-
mask_value (float, optional): Value to assign to the masked columns. Default: 0.0
|
|
1166
|
+
in frequency domain. Default: ``0``.
|
|
1167
|
+
mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is ``True``.
|
|
1168
|
+
The value should be in range of [0, freq_length - freq_mask_param], where `freq_length` is
|
|
1169
|
+
the length of audio waveform in frequency domain. Default: ``0``.
|
|
1170
|
+
mask_value (float, optional): Value to assign to the masked columns. Default: ``0.0``.
|
|
1050
1171
|
|
|
1051
1172
|
Raises:
|
|
1052
1173
|
TypeError: If `iid_masks` is not of type bool.
|
|
@@ -1063,12 +1184,18 @@ class FrequencyMasking(AudioTensorOperation):
|
|
|
1063
1184
|
|
|
1064
1185
|
Examples:
|
|
1065
1186
|
>>> import numpy as np
|
|
1187
|
+
>>> import mindspore.dataset as ds
|
|
1188
|
+
>>> import mindspore.dataset.audio as audio
|
|
1066
1189
|
>>>
|
|
1067
1190
|
>>> waveform = np.random.random([1, 3, 2])
|
|
1068
1191
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1069
1192
|
>>> transforms = [audio.FrequencyMasking(freq_mask_param=1)]
|
|
1070
1193
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1071
1194
|
|
|
1195
|
+
Tutorial Examples:
|
|
1196
|
+
- `Illustration of audio transforms
|
|
1197
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1198
|
+
|
|
1072
1199
|
.. image:: frequency_masking_original.png
|
|
1073
1200
|
|
|
1074
1201
|
.. image:: frequency_masking.png
|
|
@@ -1092,7 +1219,7 @@ class Gain(AudioTensorOperation):
|
|
|
1092
1219
|
Apply amplification or attenuation to the whole waveform.
|
|
1093
1220
|
|
|
1094
1221
|
Args:
|
|
1095
|
-
gain_db (float): Gain adjustment in decibels (dB). Default: 1.0
|
|
1222
|
+
gain_db (float): Gain adjustment in decibels (dB). Default: ``1.0``.
|
|
1096
1223
|
|
|
1097
1224
|
Raises:
|
|
1098
1225
|
TypeError: If `gain_db` is not of type float.
|
|
@@ -1102,11 +1229,17 @@ class Gain(AudioTensorOperation):
|
|
|
1102
1229
|
|
|
1103
1230
|
Examples:
|
|
1104
1231
|
>>> import numpy as np
|
|
1232
|
+
>>> import mindspore.dataset as ds
|
|
1233
|
+
>>> import mindspore.dataset.audio as audio
|
|
1105
1234
|
>>>
|
|
1106
1235
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
1107
1236
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1108
1237
|
>>> transforms = [audio.Gain(1.2)]
|
|
1109
1238
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1239
|
+
|
|
1240
|
+
Tutorial Examples:
|
|
1241
|
+
- `Illustration of audio transforms
|
|
1242
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1110
1243
|
"""
|
|
1111
1244
|
|
|
1112
1245
|
@check_gain
|
|
@@ -1126,20 +1259,20 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1126
1259
|
and `Signal estimation from modified short-time Fourier transform <https://doi.org/10.1109/ICASSP.1983.1172092>`_ .
|
|
1127
1260
|
|
|
1128
1261
|
Args:
|
|
1129
|
-
n_fft (int, optional): Size of FFT. Default: 400
|
|
1130
|
-
n_iter (int, optional): Number of iteration for phase recovery. Default: 32
|
|
1131
|
-
win_length (int, optional): Window size for GriffinLim. Default: None
|
|
1262
|
+
n_fft (int, optional): Size of FFT. Default: ``400``.
|
|
1263
|
+
n_iter (int, optional): Number of iteration for phase recovery. Default: ``32``.
|
|
1264
|
+
win_length (int, optional): Window size for GriffinLim. Default: ``None``, will be set to `n_fft` .
|
|
1132
1265
|
hop_length (int, optional): Length of hop between STFT windows.
|
|
1133
|
-
Default: None
|
|
1134
|
-
window_type (WindowType, optional): Window type for GriffinLim, which can be WindowType.BARTLETT
|
|
1135
|
-
WindowType.BLACKMAN
|
|
1136
|
-
Currently kaiser window is not supported on macOS.
|
|
1137
|
-
power (float, optional): Exponent for the magnitude spectrogram. Default: 2.0
|
|
1138
|
-
momentum (float, optional): The momentum for fast Griffin-Lim. Default: 0.99
|
|
1139
|
-
length (int, optional): Length of the expected output waveform. Default: None
|
|
1140
|
-
dimension of the stft matrix.
|
|
1266
|
+
Default: ``None``, will be set to `win_length // 2` .
|
|
1267
|
+
window_type (WindowType, optional): Window type for GriffinLim, which can be ``WindowType.BARTLETT``,
|
|
1268
|
+
``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN`` or ``WindowType.KAISER``.
|
|
1269
|
+
Default: ``WindowType.HANN``. Currently kaiser window is not supported on macOS.
|
|
1270
|
+
power (float, optional): Exponent for the magnitude spectrogram. Default: ``2.0``.
|
|
1271
|
+
momentum (float, optional): The momentum for fast Griffin-Lim. Default: ``0.99``.
|
|
1272
|
+
length (int, optional): Length of the expected output waveform. Default: ``None``,
|
|
1273
|
+
will be set to the value of last dimension of the stft matrix.
|
|
1141
1274
|
rand_init (bool, optional): Flag for random phase initialization or all-zero phase initialization.
|
|
1142
|
-
Default: True
|
|
1275
|
+
Default: ``True``.
|
|
1143
1276
|
|
|
1144
1277
|
Raises:
|
|
1145
1278
|
TypeError: If `n_fft` is not of type int.
|
|
@@ -1166,11 +1299,17 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1166
1299
|
|
|
1167
1300
|
Examples:
|
|
1168
1301
|
>>> import numpy as np
|
|
1302
|
+
>>> import mindspore.dataset as ds
|
|
1303
|
+
>>> import mindspore.dataset.audio as audio
|
|
1169
1304
|
>>>
|
|
1170
1305
|
>>> waveform = np.random.random([201, 6])
|
|
1171
1306
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1172
1307
|
>>> transforms = [audio.GriffinLim(n_fft=400)]
|
|
1173
1308
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1309
|
+
|
|
1310
|
+
Tutorial Examples:
|
|
1311
|
+
- `Illustration of audio transforms
|
|
1312
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1174
1313
|
"""
|
|
1175
1314
|
|
|
1176
1315
|
@check_griffin_lim
|
|
@@ -1202,7 +1341,7 @@ class HighpassBiquad(AudioTensorOperation):
|
|
|
1202
1341
|
Args:
|
|
1203
1342
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
1204
1343
|
cutoff_freq (float): Filter cutoff frequency (in Hz).
|
|
1205
|
-
Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707
|
|
1344
|
+
Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: ``0.707``.
|
|
1206
1345
|
|
|
1207
1346
|
Raises:
|
|
1208
1347
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -1217,11 +1356,17 @@ class HighpassBiquad(AudioTensorOperation):
|
|
|
1217
1356
|
|
|
1218
1357
|
Examples:
|
|
1219
1358
|
>>> import numpy as np
|
|
1359
|
+
>>> import mindspore.dataset as ds
|
|
1360
|
+
>>> import mindspore.dataset.audio as audio
|
|
1220
1361
|
>>>
|
|
1221
1362
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
1222
1363
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1223
1364
|
>>> transforms = [audio.HighpassBiquad(44100, 1500, 0.7)]
|
|
1224
1365
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1366
|
+
|
|
1367
|
+
Tutorial Examples:
|
|
1368
|
+
- `Illustration of audio transforms
|
|
1369
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1225
1370
|
"""
|
|
1226
1371
|
|
|
1227
1372
|
@check_highpass_biquad
|
|
@@ -1241,18 +1386,19 @@ class InverseMelScale(AudioTensorOperation):
|
|
|
1241
1386
|
|
|
1242
1387
|
Args:
|
|
1243
1388
|
n_stft (int): Number of bins in STFT.
|
|
1244
|
-
n_mels (int, optional): Number of mel filterbanks. Default: 128
|
|
1245
|
-
sample_rate (int, optional): Sample rate of audio signal. Default: 16000
|
|
1246
|
-
f_min (float, optional): Minimum frequency. Default: 0.0
|
|
1247
|
-
f_max (float, optional): Maximum frequency. Default: None
|
|
1248
|
-
max_iter (int, optional): Maximum number of optimization iterations. Default: 100000
|
|
1249
|
-
tolerance_loss (float, optional): Value of loss to stop optimization at. Default: 1e-5
|
|
1250
|
-
tolerance_change (float, optional): Difference in losses to stop optimization at. Default: 1e-8
|
|
1251
|
-
sgdargs (dict, optional): Arguments for the SGD optimizer. Default: None
|
|
1389
|
+
n_mels (int, optional): Number of mel filterbanks. Default: ``128``.
|
|
1390
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
|
|
1391
|
+
f_min (float, optional): Minimum frequency. Default: ``0.0``.
|
|
1392
|
+
f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
|
|
1393
|
+
max_iter (int, optional): Maximum number of optimization iterations. Default: ``100000``.
|
|
1394
|
+
tolerance_loss (float, optional): Value of loss to stop optimization at. Default: ``1e-5``.
|
|
1395
|
+
tolerance_change (float, optional): Difference in losses to stop optimization at. Default: ``1e-8``.
|
|
1396
|
+
sgdargs (dict, optional): Arguments for the SGD optimizer. Default: ``None``, will be set to
|
|
1252
1397
|
{'sgd_lr': 0.1, 'sgd_momentum': 0.9}.
|
|
1253
|
-
norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE
|
|
1254
|
-
Default: NormType.NONE
|
|
1255
|
-
mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK
|
|
1398
|
+
norm (NormType, optional): Normalization method, can be ``NormType.SLANEY`` or ``NormType.NONE``.
|
|
1399
|
+
Default: ``NormType.NONE``, no narmalization.
|
|
1400
|
+
mel_type (MelType, optional): Mel scale to use, can be ``MelType.SLANEY`` or ``MelType.HTK``.
|
|
1401
|
+
Default: ``MelType.HTK``.
|
|
1256
1402
|
|
|
1257
1403
|
Raises:
|
|
1258
1404
|
TypeError: If `n_stft` is not of type int.
|
|
@@ -1280,11 +1426,17 @@ class InverseMelScale(AudioTensorOperation):
|
|
|
1280
1426
|
|
|
1281
1427
|
Examples:
|
|
1282
1428
|
>>> import numpy as np
|
|
1429
|
+
>>> import mindspore.dataset as ds
|
|
1430
|
+
>>> import mindspore.dataset.audio as audio
|
|
1283
1431
|
>>>
|
|
1284
1432
|
>>> waveform = np.random.randn(2, 2, 3, 2)
|
|
1285
1433
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1286
1434
|
>>> transforms = [audio.InverseMelScale(20, 3, 16000, 0, 8000, 10)]
|
|
1287
1435
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1436
|
+
|
|
1437
|
+
Tutorial Examples:
|
|
1438
|
+
- `Illustration of audio transforms
|
|
1439
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1288
1440
|
"""
|
|
1289
1441
|
|
|
1290
1442
|
@check_inverse_mel_scale
|
|
@@ -1317,24 +1469,24 @@ class InverseSpectrogram(AudioTensorOperation):
|
|
|
1317
1469
|
Create an inverse spectrogram to recover an audio signal from a spectrogram.
|
|
1318
1470
|
|
|
1319
1471
|
Args:
|
|
1320
|
-
length (int, optional): The output length of the waveform, must be non negative. Default: None
|
|
1472
|
+
length (int, optional): The output length of the waveform, must be non negative. Default: ``None``,
|
|
1321
1473
|
means to output the whole waveform.
|
|
1322
1474
|
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0.
|
|
1323
|
-
Default: 400
|
|
1475
|
+
Default: ``400``.
|
|
1324
1476
|
win_length (int, optional): Window size, which should be greater than 0.
|
|
1325
|
-
Default: None
|
|
1477
|
+
Default: ``None``, will be set to `n_fft` .
|
|
1326
1478
|
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1327
|
-
Default: None
|
|
1328
|
-
pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: 0
|
|
1479
|
+
Default: ``None``, will be set to `win_length // 2` .
|
|
1480
|
+
pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: ``0``.
|
|
1329
1481
|
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1330
|
-
frame/window. Default: WindowType.HANN
|
|
1331
|
-
normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: False
|
|
1332
|
-
center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: True
|
|
1333
|
-
pad_mode (BorderType, optional): Controls the padding method used when `center` is True
|
|
1334
|
-
can be BorderType.REFLECT
|
|
1335
|
-
Default: BorderType.REFLECT
|
|
1482
|
+
frame/window. Default: ``WindowType.HANN``.
|
|
1483
|
+
normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: ``False``.
|
|
1484
|
+
center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: ``True``.
|
|
1485
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
|
|
1486
|
+
can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
|
|
1487
|
+
Default: ``BorderType.REFLECT``.
|
|
1336
1488
|
onesided (bool, optional): Controls whether spectrogram was used to return half of results to avoid
|
|
1337
|
-
redundancy. Default: True
|
|
1489
|
+
redundancy. Default: ``True``.
|
|
1338
1490
|
|
|
1339
1491
|
Raises:
|
|
1340
1492
|
TypeError: If `length` is not of type int.
|
|
@@ -1358,12 +1510,18 @@ class InverseSpectrogram(AudioTensorOperation):
|
|
|
1358
1510
|
|
|
1359
1511
|
Examples:
|
|
1360
1512
|
>>> import numpy as np
|
|
1513
|
+
>>> import mindspore.dataset as ds
|
|
1514
|
+
>>> import mindspore.dataset.audio as audio
|
|
1361
1515
|
>>>
|
|
1362
1516
|
>>> waveform = np.array([[[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1363
1517
|
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]]])
|
|
1364
1518
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1365
1519
|
>>> transforms = [audio.InverseSpectrogram(1, 400, 400, 200)]
|
|
1366
1520
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1521
|
+
|
|
1522
|
+
Tutorial Examples:
|
|
1523
|
+
- `Illustration of audio transforms
|
|
1524
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1367
1525
|
"""
|
|
1368
1526
|
|
|
1369
1527
|
@check_inverse_spectrogram
|
|
@@ -1400,16 +1558,16 @@ class LFCC(AudioTensorOperation):
|
|
|
1400
1558
|
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
1401
1559
|
|
|
1402
1560
|
Args:
|
|
1403
|
-
sample_rate (int, optional): Sample rate of audio signal. Default: 16000
|
|
1404
|
-
n_filter (int, optional) : Number of linear filters to apply. Default: 128
|
|
1405
|
-
n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: 40
|
|
1406
|
-
f_min (float, optional): Minimum frequency. Default: 0.0
|
|
1407
|
-
f_max (float, optional): Maximum frequency. Default: None
|
|
1408
|
-
dct_type (int, optional) : Type of DCT to use. The value can only be 2
|
|
1409
|
-
norm (NormMode, optional) : Norm to use. Default: NormMode.ORTHO
|
|
1410
|
-
log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: False
|
|
1561
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
|
|
1562
|
+
n_filter (int, optional) : Number of linear filters to apply. Default: ``128``.
|
|
1563
|
+
n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: ``40``.
|
|
1564
|
+
f_min (float, optional): Minimum frequency. Default: ``0.0``.
|
|
1565
|
+
f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
|
|
1566
|
+
dct_type (int, optional) : Type of DCT to use. The value can only be ``2``. Default: ``2``.
|
|
1567
|
+
norm (NormMode, optional) : Norm to use. Default: ``NormMode.ORTHO``.
|
|
1568
|
+
log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: ``False``.
|
|
1411
1569
|
speckwargs (dict, optional) : Arguments for :class:`mindspore.dataset.audio.Spectrogram`.
|
|
1412
|
-
Default: None
|
|
1570
|
+
Default: ``None``, the default setting is a dict including
|
|
1413
1571
|
|
|
1414
1572
|
- 'n_fft': 400
|
|
1415
1573
|
- 'win_length': n_fft
|
|
@@ -1433,7 +1591,7 @@ class LFCC(AudioTensorOperation):
|
|
|
1433
1591
|
ValueError: If `n_lfcc` is less than 0.
|
|
1434
1592
|
ValueError: If `f_min` is greater than `f_max` .
|
|
1435
1593
|
ValueError: If `f_min` is greater than `sample_rate // 2` when `f_max` is set to None.
|
|
1436
|
-
ValueError: If `dct_type` is not 2
|
|
1594
|
+
ValueError: If `dct_type` is not ``2``.
|
|
1437
1595
|
|
|
1438
1596
|
Supported Platforms:
|
|
1439
1597
|
``CPU``
|
|
@@ -1447,6 +1605,10 @@ class LFCC(AudioTensorOperation):
|
|
|
1447
1605
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1448
1606
|
>>> transforms = [audio.LFCC()]
|
|
1449
1607
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1608
|
+
|
|
1609
|
+
Tutorial Examples:
|
|
1610
|
+
- `Illustration of audio transforms
|
|
1611
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1450
1612
|
"""
|
|
1451
1613
|
|
|
1452
1614
|
@check_lfcc
|
|
@@ -1494,7 +1656,7 @@ class LFilter(AudioTensorOperation):
|
|
|
1494
1656
|
b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
|
|
1495
1657
|
Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
|
|
1496
1658
|
Must be same size as a_coeffs (pad with 0's as necessary).
|
|
1497
|
-
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True
|
|
1659
|
+
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: ``True``.
|
|
1498
1660
|
|
|
1499
1661
|
Raises:
|
|
1500
1662
|
TypeError: If `a_coeffs` is not of type Sequence[float].
|
|
@@ -1508,6 +1670,8 @@ class LFilter(AudioTensorOperation):
|
|
|
1508
1670
|
|
|
1509
1671
|
Examples:
|
|
1510
1672
|
>>> import numpy as np
|
|
1673
|
+
>>> import mindspore.dataset as ds
|
|
1674
|
+
>>> import mindspore.dataset.audio as audio
|
|
1511
1675
|
>>>
|
|
1512
1676
|
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
|
|
1513
1677
|
>>> a_coeffs = [0.1, 0.2, 0.3]
|
|
@@ -1515,6 +1679,10 @@ class LFilter(AudioTensorOperation):
|
|
|
1515
1679
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1516
1680
|
>>> transforms = [audio.LFilter(a_coeffs, b_coeffs)]
|
|
1517
1681
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1682
|
+
|
|
1683
|
+
Tutorial Examples:
|
|
1684
|
+
- `Illustration of audio transforms
|
|
1685
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1518
1686
|
"""
|
|
1519
1687
|
|
|
1520
1688
|
@check_lfilter
|
|
@@ -1547,7 +1715,7 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1547
1715
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
1548
1716
|
cutoff_freq (float): Filter cutoff frequency (in Hz).
|
|
1549
1717
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
1550
|
-
in range of (0, 1]. Default: 0.707
|
|
1718
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
1551
1719
|
|
|
1552
1720
|
Raises:
|
|
1553
1721
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -1562,12 +1730,18 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1562
1730
|
|
|
1563
1731
|
Examples:
|
|
1564
1732
|
>>> import numpy as np
|
|
1733
|
+
>>> import mindspore.dataset as ds
|
|
1734
|
+
>>> import mindspore.dataset.audio as audio
|
|
1565
1735
|
>>>
|
|
1566
1736
|
>>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1567
1737
|
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
|
|
1568
1738
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1569
1739
|
>>> transforms = [audio.LowpassBiquad(4000, 1500, 0.7)]
|
|
1570
1740
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1741
|
+
|
|
1742
|
+
Tutorial Examples:
|
|
1743
|
+
- `Illustration of audio transforms
|
|
1744
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1571
1745
|
"""
|
|
1572
1746
|
|
|
1573
1747
|
@check_lowpass_biquad
|
|
@@ -1583,10 +1757,10 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1583
1757
|
|
|
1584
1758
|
class Magphase(AudioTensorOperation):
|
|
1585
1759
|
"""
|
|
1586
|
-
Separate a complex-valued spectrogram with shape (..., 2) into its magnitude and phase.
|
|
1760
|
+
Separate a complex-valued spectrogram with shape :math:`(..., 2)` into its magnitude and phase.
|
|
1587
1761
|
|
|
1588
1762
|
Args:
|
|
1589
|
-
power (float): Power of the norm, which must be non-negative. Default: 1.0
|
|
1763
|
+
power (float): Power of the norm, which must be non-negative. Default: ``1.0``.
|
|
1590
1764
|
|
|
1591
1765
|
Raises:
|
|
1592
1766
|
RuntimeError: If the shape of input audio waveform does not match (..., 2).
|
|
@@ -1596,11 +1770,17 @@ class Magphase(AudioTensorOperation):
|
|
|
1596
1770
|
|
|
1597
1771
|
Examples:
|
|
1598
1772
|
>>> import numpy as np
|
|
1773
|
+
>>> import mindspore.dataset as ds
|
|
1774
|
+
>>> import mindspore.dataset.audio as audio
|
|
1599
1775
|
>>>
|
|
1600
1776
|
>>> waveform = np.random.random([2, 4, 2])
|
|
1601
1777
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1602
1778
|
>>> transforms = [audio.Magphase()]
|
|
1603
1779
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1780
|
+
|
|
1781
|
+
Tutorial Examples:
|
|
1782
|
+
- `Illustration of audio transforms
|
|
1783
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1604
1784
|
"""
|
|
1605
1785
|
|
|
1606
1786
|
@check_magphase
|
|
@@ -1632,11 +1812,17 @@ class MaskAlongAxis(AudioTensorOperation):
|
|
|
1632
1812
|
|
|
1633
1813
|
Examples:
|
|
1634
1814
|
>>> import numpy as np
|
|
1815
|
+
>>> import mindspore.dataset as ds
|
|
1816
|
+
>>> import mindspore.dataset.audio as audio
|
|
1635
1817
|
>>>
|
|
1636
1818
|
>>> waveform = np.random.random([1, 20, 20])
|
|
1637
1819
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1638
1820
|
>>> transforms = [audio.MaskAlongAxis(0, 10, 0.5, 1)]
|
|
1639
1821
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1822
|
+
|
|
1823
|
+
Tutorial Examples:
|
|
1824
|
+
- `Illustration of audio transforms
|
|
1825
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1640
1826
|
"""
|
|
1641
1827
|
|
|
1642
1828
|
@check_mask_along_axis
|
|
@@ -1677,11 +1863,17 @@ class MaskAlongAxisIID(AudioTensorOperation):
|
|
|
1677
1863
|
|
|
1678
1864
|
Examples:
|
|
1679
1865
|
>>> import numpy as np
|
|
1866
|
+
>>> import mindspore.dataset as ds
|
|
1867
|
+
>>> import mindspore.dataset.audio as audio
|
|
1680
1868
|
>>>
|
|
1681
1869
|
>>> waveform= np.random.random([1, 20, 20])
|
|
1682
1870
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1683
1871
|
>>> transforms = [audio.MaskAlongAxisIID(5, 0.5, 2)]
|
|
1684
1872
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1873
|
+
|
|
1874
|
+
Tutorial Examples:
|
|
1875
|
+
- `Illustration of audio transforms
|
|
1876
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1685
1877
|
"""
|
|
1686
1878
|
|
|
1687
1879
|
@check_mask_along_axis_iid
|
|
@@ -1707,15 +1899,16 @@ class MelScale(AudioTensorOperation):
|
|
|
1707
1899
|
Convert normal STFT to STFT at the Mel scale.
|
|
1708
1900
|
|
|
1709
1901
|
Args:
|
|
1710
|
-
n_mels (int, optional): Number of mel filterbanks. Default: 128
|
|
1711
|
-
sample_rate (int, optional): Sample rate of audio signal. Default: 16000
|
|
1712
|
-
f_min (float, optional): Minimum frequency. Default: 0.0
|
|
1713
|
-
f_max (float, optional): Maximum frequency. Default: None
|
|
1714
|
-
n_stft (int, optional): Number of bins in STFT. Default: 201
|
|
1715
|
-
norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType
|
|
1716
|
-
If norm is NormType.SLANEY
|
|
1717
|
-
Default: NormType.NONE
|
|
1718
|
-
mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK
|
|
1902
|
+
n_mels (int, optional): Number of mel filterbanks. Default: ``128``.
|
|
1903
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: ``16000``.
|
|
1904
|
+
f_min (float, optional): Minimum frequency. Default: ``0.0``.
|
|
1905
|
+
f_max (float, optional): Maximum frequency. Default: ``None``, will be set to `sample_rate // 2` .
|
|
1906
|
+
n_stft (int, optional): Number of bins in STFT. Default: ``201``.
|
|
1907
|
+
norm (NormType, optional): Type of norm, value should be ``NormType.SLANEY`` or ``NormType.NONE``.
|
|
1908
|
+
If `norm` is ``NormType.SLANEY``, divide the triangular mel weight by the width of the mel band.
|
|
1909
|
+
Default: ``NormType.NONE``, no narmalization.
|
|
1910
|
+
mel_type (MelType, optional): Type to use, value should be ``MelType.SLANEY`` or ``MelType.HTK``.
|
|
1911
|
+
Default: ``MelType.HTK``.
|
|
1719
1912
|
|
|
1720
1913
|
Raises:
|
|
1721
1914
|
TypeError: If `n_mels` is not of type int.
|
|
@@ -1736,12 +1929,18 @@ class MelScale(AudioTensorOperation):
|
|
|
1736
1929
|
|
|
1737
1930
|
Examples:
|
|
1738
1931
|
>>> import numpy as np
|
|
1932
|
+
>>> import mindspore.dataset as ds
|
|
1933
|
+
>>> import mindspore.dataset.audio as audio
|
|
1739
1934
|
>>>
|
|
1740
1935
|
>>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1741
1936
|
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
|
|
1742
1937
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1743
1938
|
>>> transforms = [audio.MelScale(4000, 1500, 0.7)]
|
|
1744
1939
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1940
|
+
|
|
1941
|
+
Tutorial Examples:
|
|
1942
|
+
- `Illustration of audio transforms
|
|
1943
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1745
1944
|
"""
|
|
1746
1945
|
|
|
1747
1946
|
@check_mel_scale
|
|
@@ -1766,31 +1965,33 @@ class MelSpectrogram(AudioTensorOperation):
|
|
|
1766
1965
|
Create MelSpectrogram for a raw audio signal.
|
|
1767
1966
|
|
|
1768
1967
|
Args:
|
|
1769
|
-
sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0.
|
|
1968
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0.
|
|
1969
|
+
Default: ``16000``.
|
|
1770
1970
|
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0 and less than
|
|
1771
|
-
twice of the last dimension size of the input. Default: 400
|
|
1971
|
+
twice of the last dimension size of the input. Default: ``400``.
|
|
1772
1972
|
win_length (int, optional): Window size, which should be greater than 0 and no more than `n_fft` . Default:
|
|
1773
1973
|
None, will be set to `n_fft` .
|
|
1774
1974
|
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1775
|
-
Default: None
|
|
1776
|
-
f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: 0.0
|
|
1777
|
-
f_max (float, optional): Maximum frequency, which can't be less than 0. Default: None
|
|
1975
|
+
Default: ``None``, will be set to `win_length // 2` .
|
|
1976
|
+
f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: ``0.0``.
|
|
1977
|
+
f_max (float, optional): Maximum frequency, which can't be less than 0. Default: ``None``, will be set
|
|
1778
1978
|
to `sample_rate // 2` .
|
|
1779
|
-
pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: 0
|
|
1780
|
-
n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: 128
|
|
1979
|
+
pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: ``0``.
|
|
1980
|
+
n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: ``128``.
|
|
1781
1981
|
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1782
|
-
frame/window. Default: WindowType.HANN
|
|
1982
|
+
frame/window. Default: ``WindowType.HANN``.
|
|
1783
1983
|
power (float, optional): Exponent for the magnitude spectrogram, which must be
|
|
1784
|
-
greater than 0, e.g., 1 for energy, 2 for power, etc. Default: 2.0
|
|
1785
|
-
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False
|
|
1786
|
-
center (bool, optional): Whether to pad waveform on both sides. Default: True
|
|
1787
|
-
pad_mode (BorderType, optional): Controls the padding method used when `center` is True
|
|
1788
|
-
can be BorderType.REFLECT
|
|
1789
|
-
Default: BorderType.REFLECT
|
|
1790
|
-
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True
|
|
1984
|
+
greater than 0, e.g., ``1`` for energy, ``2`` for power, etc. Default: ``2.0``.
|
|
1985
|
+
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: ``False``.
|
|
1986
|
+
center (bool, optional): Whether to pad waveform on both sides. Default: ``True``.
|
|
1987
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
|
|
1988
|
+
can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
|
|
1989
|
+
Default: ``BorderType.REFLECT``.
|
|
1990
|
+
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: ``True``.
|
|
1791
1991
|
norm (NormType, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
|
|
1792
|
-
(area normalization). Default: NormType.NONE
|
|
1793
|
-
mel_scale (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK
|
|
1992
|
+
(area normalization). Default: ``NormType.NONE``, no narmalization.
|
|
1993
|
+
mel_scale (MelType, optional): Mel scale to use, can be ``MelType.SLANEY`` or ``MelType.HTK``.
|
|
1994
|
+
Default: ``MelType.HTK``.
|
|
1794
1995
|
|
|
1795
1996
|
Raises:
|
|
1796
1997
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -1825,6 +2026,8 @@ class MelSpectrogram(AudioTensorOperation):
|
|
|
1825
2026
|
|
|
1826
2027
|
Examples:
|
|
1827
2028
|
>>> import numpy as np
|
|
2029
|
+
>>> import mindspore.dataset as ds
|
|
2030
|
+
>>> import mindspore.dataset.audio as audio
|
|
1828
2031
|
>>>
|
|
1829
2032
|
>>> from mindspore.dataset.audio import WindowType, BorderType, NormType, MelType
|
|
1830
2033
|
>>>
|
|
@@ -1835,6 +2038,10 @@ class MelSpectrogram(AudioTensorOperation):
|
|
|
1835
2038
|
... normalized=False, center=True, pad_mode=BorderType.REFLECT, \
|
|
1836
2039
|
... onesided=True, norm=NormType.SLANEY, mel_scale=MelType.HTK)]
|
|
1837
2040
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2041
|
+
|
|
2042
|
+
Tutorial Examples:
|
|
2043
|
+
- `Illustration of audio transforms
|
|
2044
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1838
2045
|
"""
|
|
1839
2046
|
|
|
1840
2047
|
@check_mel_spectrogram
|
|
@@ -1872,13 +2079,13 @@ class MFCC(AudioTensorOperation):
|
|
|
1872
2079
|
Create MFCC for a raw audio signal.
|
|
1873
2080
|
|
|
1874
2081
|
Args:
|
|
1875
|
-
sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: 16000
|
|
1876
|
-
n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: 40
|
|
1877
|
-
dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be 2
|
|
1878
|
-
norm (NormMode, optional): Norm to use. Default: NormMode.ORTHO
|
|
1879
|
-
log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: False
|
|
2082
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: ``16000``.
|
|
2083
|
+
n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: ``40``.
|
|
2084
|
+
dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be ``2``. Default: ``2``.
|
|
2085
|
+
norm (NormMode, optional): Norm to use. Default: ``NormMode.ORTHO``.
|
|
2086
|
+
log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: ``False``.
|
|
1880
2087
|
melkwargs (dict, optional): Arguments for :class:`mindspore.dataset.audio.MelSpectrogram`.
|
|
1881
|
-
Default: None
|
|
2088
|
+
Default: ``None``, the default setting is a dict including
|
|
1882
2089
|
|
|
1883
2090
|
- 'n_fft': 400
|
|
1884
2091
|
- 'win_length': n_fft
|
|
@@ -1903,19 +2110,25 @@ class MFCC(AudioTensorOperation):
|
|
|
1903
2110
|
TypeError: If `melkwargs` is not of type dict.
|
|
1904
2111
|
ValueError: If `sample_rate` is a negative number.
|
|
1905
2112
|
ValueError: If `n_mfcc` is a negative number.
|
|
1906
|
-
ValueError: If `dct_type` is not 2
|
|
2113
|
+
ValueError: If `dct_type` is not ``2``.
|
|
1907
2114
|
|
|
1908
2115
|
Supported Platforms:
|
|
1909
2116
|
``CPU``
|
|
1910
2117
|
|
|
1911
2118
|
Examples:
|
|
1912
2119
|
>>> import numpy as np
|
|
2120
|
+
>>> import mindspore.dataset as ds
|
|
2121
|
+
>>> import mindspore.dataset.audio as audio
|
|
1913
2122
|
>>>
|
|
1914
2123
|
>>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1915
2124
|
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
|
|
1916
2125
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1917
2126
|
>>> transforms = [audio.MFCC(4000, 1500, 2)]
|
|
1918
2127
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2128
|
+
|
|
2129
|
+
Tutorial Examples:
|
|
2130
|
+
- `Illustration of audio transforms
|
|
2131
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1919
2132
|
"""
|
|
1920
2133
|
|
|
1921
2134
|
@check_mfcc
|
|
@@ -1961,7 +2174,7 @@ class MuLawDecoding(AudioTensorOperation):
|
|
|
1961
2174
|
Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
|
|
1962
2175
|
|
|
1963
2176
|
Args:
|
|
1964
|
-
quantization_channels (int, optional): Number of channels, which must be positive. Default: 256
|
|
2177
|
+
quantization_channels (int, optional): Number of channels, which must be positive. Default: ``256``.
|
|
1965
2178
|
|
|
1966
2179
|
Raises:
|
|
1967
2180
|
TypeError: If `quantization_channels` is not of type int.
|
|
@@ -1973,11 +2186,17 @@ class MuLawDecoding(AudioTensorOperation):
|
|
|
1973
2186
|
|
|
1974
2187
|
Examples:
|
|
1975
2188
|
>>> import numpy as np
|
|
2189
|
+
>>> import mindspore.dataset as ds
|
|
2190
|
+
>>> import mindspore.dataset.audio as audio
|
|
1976
2191
|
>>>
|
|
1977
2192
|
>>> waveform = np.random.random([1, 3, 4])
|
|
1978
2193
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1979
2194
|
>>> transforms = [audio.MuLawDecoding()]
|
|
1980
2195
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2196
|
+
|
|
2197
|
+
Tutorial Examples:
|
|
2198
|
+
- `Illustration of audio transforms
|
|
2199
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
1981
2200
|
"""
|
|
1982
2201
|
|
|
1983
2202
|
@check_mu_law_coding
|
|
@@ -1994,7 +2213,7 @@ class MuLawEncoding(AudioTensorOperation):
|
|
|
1994
2213
|
Encode signal based on mu-law companding.
|
|
1995
2214
|
|
|
1996
2215
|
Args:
|
|
1997
|
-
quantization_channels (int, optional): Number of channels, which must be positive. Default: 256
|
|
2216
|
+
quantization_channels (int, optional): Number of channels, which must be positive. Default: ``256``.
|
|
1998
2217
|
|
|
1999
2218
|
Raises:
|
|
2000
2219
|
TypeError: If `quantization_channels` is not of type int.
|
|
@@ -2005,11 +2224,17 @@ class MuLawEncoding(AudioTensorOperation):
|
|
|
2005
2224
|
|
|
2006
2225
|
Examples:
|
|
2007
2226
|
>>> import numpy as np
|
|
2227
|
+
>>> import mindspore.dataset as ds
|
|
2228
|
+
>>> import mindspore.dataset.audio as audio
|
|
2008
2229
|
>>>
|
|
2009
2230
|
>>> waveform = np.random.random([1, 3, 4])
|
|
2010
2231
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2011
2232
|
>>> transforms = [audio.MuLawEncoding()]
|
|
2012
2233
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2234
|
+
|
|
2235
|
+
Tutorial Examples:
|
|
2236
|
+
- `Illustration of audio transforms
|
|
2237
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2013
2238
|
"""
|
|
2014
2239
|
|
|
2015
2240
|
@check_mu_law_coding
|
|
@@ -2028,9 +2253,10 @@ class Overdrive(AudioTensorOperation):
|
|
|
2028
2253
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
2029
2254
|
|
|
2030
2255
|
Args:
|
|
2031
|
-
gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100].
|
|
2256
|
+
gain (float, optional): Desired gain at the boost (or attenuation) in dB, in range of [0, 100].
|
|
2257
|
+
Default: ``20.0``.
|
|
2032
2258
|
color (float, optional): Controls the amount of even harmonic content in the over-driven output,
|
|
2033
|
-
in range of [0, 100]. Default: 20.0
|
|
2259
|
+
in range of [0, 100]. Default: ``20.0``.
|
|
2034
2260
|
|
|
2035
2261
|
Raises:
|
|
2036
2262
|
TypeError: If `gain` is not of type float.
|
|
@@ -2044,11 +2270,17 @@ class Overdrive(AudioTensorOperation):
|
|
|
2044
2270
|
|
|
2045
2271
|
Examples:
|
|
2046
2272
|
>>> import numpy as np
|
|
2273
|
+
>>> import mindspore.dataset as ds
|
|
2274
|
+
>>> import mindspore.dataset.audio as audio
|
|
2047
2275
|
>>>
|
|
2048
2276
|
>>> waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
|
|
2049
2277
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2050
2278
|
>>> transforms = [audio.Overdrive()]
|
|
2051
2279
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2280
|
+
|
|
2281
|
+
Tutorial Examples:
|
|
2282
|
+
- `Illustration of audio transforms
|
|
2283
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2052
2284
|
"""
|
|
2053
2285
|
|
|
2054
2286
|
@check_overdrive
|
|
@@ -2070,14 +2302,15 @@ class Phaser(AudioTensorOperation):
|
|
|
2070
2302
|
Args:
|
|
2071
2303
|
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
|
|
2072
2304
|
gain_in (float, optional): Desired input gain at the boost (or attenuation) in dB,
|
|
2073
|
-
in range of [0.0, 1.0]. Default: 0.4
|
|
2305
|
+
in range of [0.0, 1.0]. Default: ``0.4``.
|
|
2074
2306
|
gain_out (float, optional): Desired output gain at the boost (or attenuation) in dB,
|
|
2075
|
-
in range of [0.0, 1e9]. Default: 0.74
|
|
2076
|
-
delay_ms (float, optional): Desired delay in milliseconds, in range of [0.0, 5.0]. Default: 3.0
|
|
2077
|
-
decay (float, optional): Desired decay relative to gain-in, in range of [0.0, 0.99]. Default: 0.4
|
|
2078
|
-
mod_speed (float, optional): Modulation speed in Hz, in range of [0.1, 2.0]. Default: 0.5
|
|
2079
|
-
sinusoidal (bool, optional): If True
|
|
2080
|
-
If False
|
|
2307
|
+
in range of [0.0, 1e9]. Default: ``0.74``.
|
|
2308
|
+
delay_ms (float, optional): Desired delay in milliseconds, in range of [0.0, 5.0]. Default: ``3.0``.
|
|
2309
|
+
decay (float, optional): Desired decay relative to gain-in, in range of [0.0, 0.99]. Default: ``0.4``.
|
|
2310
|
+
mod_speed (float, optional): Modulation speed in Hz, in range of [0.1, 2.0]. Default: ``0.5``.
|
|
2311
|
+
sinusoidal (bool, optional): If ``True``, use sinusoidal modulation (preferable for multiple instruments).
|
|
2312
|
+
If ``False``, use triangular modulation (gives single instruments a sharper phasing effect).
|
|
2313
|
+
Default: ``True``.
|
|
2081
2314
|
|
|
2082
2315
|
Raises:
|
|
2083
2316
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -2099,11 +2332,17 @@ class Phaser(AudioTensorOperation):
|
|
|
2099
2332
|
|
|
2100
2333
|
Examples:
|
|
2101
2334
|
>>> import numpy as np
|
|
2335
|
+
>>> import mindspore.dataset as ds
|
|
2336
|
+
>>> import mindspore.dataset.audio as audio
|
|
2102
2337
|
>>>
|
|
2103
2338
|
>>> waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
|
|
2104
2339
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2105
2340
|
>>> transforms = [audio.Phaser(44100)]
|
|
2106
2341
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2342
|
+
|
|
2343
|
+
Tutorial Examples:
|
|
2344
|
+
- `Illustration of audio transforms
|
|
2345
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2107
2346
|
"""
|
|
2108
2347
|
|
|
2109
2348
|
@check_phaser
|
|
@@ -2142,12 +2381,18 @@ class PhaseVocoder(AudioTensorOperation):
|
|
|
2142
2381
|
|
|
2143
2382
|
Examples:
|
|
2144
2383
|
>>> import numpy as np
|
|
2384
|
+
>>> import mindspore.dataset as ds
|
|
2385
|
+
>>> import mindspore.dataset.audio as audio
|
|
2145
2386
|
>>>
|
|
2146
2387
|
>>> waveform = np.random.random([2, 44, 10, 2])
|
|
2147
2388
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2148
2389
|
>>> phase_advance = np.random.random([44, 1])
|
|
2149
2390
|
>>> transforms = [audio.PhaseVocoder(rate=2, phase_advance=phase_advance)]
|
|
2150
2391
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2392
|
+
|
|
2393
|
+
Tutorial Examples:
|
|
2394
|
+
- `Illustration of audio transforms
|
|
2395
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2151
2396
|
"""
|
|
2152
2397
|
|
|
2153
2398
|
@check_phase_vocoder
|
|
@@ -2167,13 +2412,13 @@ class PitchShift(AudioTensorOperation):
|
|
|
2167
2412
|
Args:
|
|
2168
2413
|
sample_rate (int): Sampling rate of waveform (in Hz).
|
|
2169
2414
|
n_steps (int): The steps to shift waveform.
|
|
2170
|
-
bins_per_octave (int, optional): The number of steps per octave. Default: 12
|
|
2171
|
-
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 512
|
|
2172
|
-
win_length (int, optional): Window size. Default: None
|
|
2173
|
-
hop_length (int, optional): Length of hop between STFT windows. Default: None
|
|
2415
|
+
bins_per_octave (int, optional): The number of steps per octave. Default: ``12``.
|
|
2416
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``512``.
|
|
2417
|
+
win_length (int, optional): Window size. Default: ``None``, will be set to `n_fft` .
|
|
2418
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: ``None``,
|
|
2174
2419
|
will be set to `win_length // 4` .
|
|
2175
2420
|
window (WindowType, optional): Window tensor that is applied/multiplied to each frame/window.
|
|
2176
|
-
Default: WindowType.HANN
|
|
2421
|
+
Default: ``WindowType.HANN``.
|
|
2177
2422
|
|
|
2178
2423
|
Raises:
|
|
2179
2424
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -2203,6 +2448,10 @@ class PitchShift(AudioTensorOperation):
|
|
|
2203
2448
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2204
2449
|
>>> transforms = [audio.PitchShift(sample_rate=16000,n_steps=4)]
|
|
2205
2450
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2451
|
+
|
|
2452
|
+
Tutorial Examples:
|
|
2453
|
+
- `Illustration of audio transforms
|
|
2454
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2206
2455
|
"""
|
|
2207
2456
|
|
|
2208
2457
|
@check_pitch_shift
|
|
@@ -2231,15 +2480,18 @@ class Resample(AudioTensorOperation):
|
|
|
2231
2480
|
Resample a signal from one frequency to another. A resample method can be given.
|
|
2232
2481
|
|
|
2233
2482
|
Args:
|
|
2234
|
-
orig_freq (float, optional): The original frequency of the signal, must be positive. Default: 16000
|
|
2235
|
-
new_freq (float, optional): The desired frequency, must be positive. Default: 16000
|
|
2236
|
-
resample_method (ResampleMethod, optional): The resample method to use, can be
|
|
2237
|
-
|
|
2483
|
+
orig_freq (float, optional): The original frequency of the signal, must be positive. Default: ``16000``.
|
|
2484
|
+
new_freq (float, optional): The desired frequency, must be positive. Default: ``16000``.
|
|
2485
|
+
resample_method (ResampleMethod, optional): The resample method to use, can be
|
|
2486
|
+
``ResampleMethod.SINC_INTERPOLATION`` or ``ResampleMethod.KAISER_WINDOW``.
|
|
2487
|
+
Default: ``ResampleMethod.SINC_INTERPOLATION``.
|
|
2238
2488
|
lowpass_filter_width (int, optional): Controls the sharpness of the filter, more means sharper but less
|
|
2239
|
-
efficient, must be positive. Default: 6
|
|
2489
|
+
efficient, must be positive. Default: ``6``.
|
|
2240
2490
|
rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist. Lower values
|
|
2241
|
-
reduce anti-aliasing, but also reduce some of the highest frequencies, in range of (0, 1].
|
|
2242
|
-
|
|
2491
|
+
reduce anti-aliasing, but also reduce some of the highest frequencies, in range of (0, 1].
|
|
2492
|
+
Default: ``0.99``.
|
|
2493
|
+
beta (float, optional): The shape parameter used for kaiser window. Default: ``None``,
|
|
2494
|
+
will use ``14.769656459379492``.
|
|
2243
2495
|
|
|
2244
2496
|
Raises:
|
|
2245
2497
|
TypeError: If `orig_freq` is not of type float.
|
|
@@ -2258,6 +2510,8 @@ class Resample(AudioTensorOperation):
|
|
|
2258
2510
|
|
|
2259
2511
|
Examples:
|
|
2260
2512
|
>>> import numpy as np
|
|
2513
|
+
>>> import mindspore.dataset as ds
|
|
2514
|
+
>>> import mindspore.dataset.audio as audio
|
|
2261
2515
|
>>> from mindspore.dataset.audio import ResampleMethod
|
|
2262
2516
|
>>>
|
|
2263
2517
|
>>> waveform = np.random.random([1, 30])
|
|
@@ -2266,6 +2520,10 @@ class Resample(AudioTensorOperation):
|
|
|
2266
2520
|
... resample_method=ResampleMethod.SINC_INTERPOLATION,
|
|
2267
2521
|
... lowpass_filter_width=6, rolloff=0.99, beta=None)]
|
|
2268
2522
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2523
|
+
|
|
2524
|
+
Tutorial Examples:
|
|
2525
|
+
- `Illustration of audio transforms
|
|
2526
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2269
2527
|
"""
|
|
2270
2528
|
|
|
2271
2529
|
@check_resample
|
|
@@ -2304,11 +2562,17 @@ class RiaaBiquad(AudioTensorOperation):
|
|
|
2304
2562
|
|
|
2305
2563
|
Examples:
|
|
2306
2564
|
>>> import numpy as np
|
|
2565
|
+
>>> import mindspore.dataset as ds
|
|
2566
|
+
>>> import mindspore.dataset.audio as audio
|
|
2307
2567
|
>>>
|
|
2308
2568
|
>>> waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
|
|
2309
2569
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2310
2570
|
>>> transforms = [audio.RiaaBiquad(44100)]
|
|
2311
2571
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2572
|
+
|
|
2573
|
+
Tutorial Examples:
|
|
2574
|
+
- `Illustration of audio transforms
|
|
2575
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2312
2576
|
"""
|
|
2313
2577
|
|
|
2314
2578
|
@check_riaa_biquad
|
|
@@ -2325,12 +2589,12 @@ class SlidingWindowCmn(AudioTensorOperation):
|
|
|
2325
2589
|
Apply sliding-window cepstral mean (and optionally variance) normalization per utterance.
|
|
2326
2590
|
|
|
2327
2591
|
Args:
|
|
2328
|
-
cmn_window (int, optional): Window in frames for running average CMN computation. Default: 600
|
|
2592
|
+
cmn_window (int, optional): Window in frames for running average CMN computation. Default: ``600``.
|
|
2329
2593
|
min_cmn_window (int, optional): Minimum CMN window used at start of decoding (adds latency only at start).
|
|
2330
|
-
Only applicable if center is False
|
|
2331
|
-
center (bool, optional): If True
|
|
2332
|
-
to the left. Default: False
|
|
2333
|
-
norm_vars (bool, optional): If True
|
|
2594
|
+
Only applicable if center is ``False``, ignored if center is ``True``. Default: ``100``.
|
|
2595
|
+
center (bool, optional): If ``True``, use a window centered on the current frame. If ``False``, window is
|
|
2596
|
+
to the left. Default: ``False``.
|
|
2597
|
+
norm_vars (bool, optional): If ``True``, normalize variance to one. Default: ``False``.
|
|
2334
2598
|
|
|
2335
2599
|
Raises:
|
|
2336
2600
|
TypeError: If `cmn_window` is not of type int.
|
|
@@ -2345,11 +2609,17 @@ class SlidingWindowCmn(AudioTensorOperation):
|
|
|
2345
2609
|
|
|
2346
2610
|
Examples:
|
|
2347
2611
|
>>> import numpy as np
|
|
2612
|
+
>>> import mindspore.dataset as ds
|
|
2613
|
+
>>> import mindspore.dataset.audio as audio
|
|
2348
2614
|
>>>
|
|
2349
2615
|
>>> waveform = np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float64)
|
|
2350
2616
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2351
2617
|
>>> transforms = [audio.SlidingWindowCmn()]
|
|
2352
2618
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2619
|
+
|
|
2620
|
+
Tutorial Examples:
|
|
2621
|
+
- `Illustration of audio transforms
|
|
2622
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2353
2623
|
"""
|
|
2354
2624
|
|
|
2355
2625
|
@check_sliding_window_cmn
|
|
@@ -2376,14 +2646,14 @@ class SpectralCentroid(TensorOperation):
|
|
|
2376
2646
|
Compute the spectral centroid for each channel along the time axis.
|
|
2377
2647
|
|
|
2378
2648
|
Args:
|
|
2379
|
-
sample_rate (int): Sampling rate of audio signal, e.g. 44100 (Hz).
|
|
2380
|
-
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400
|
|
2381
|
-
win_length (int, optional): Window size. Default: None
|
|
2382
|
-
hop_length (int, optional): Length of hop between STFT windows. Default: None
|
|
2383
|
-
pad (int, optional): Two sided padding of signal. Default: 0
|
|
2649
|
+
sample_rate (int): Sampling rate of audio signal, e.g. ``44100`` (Hz).
|
|
2650
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``400``.
|
|
2651
|
+
win_length (int, optional): Window size. Default: ``None``, will use `n_fft` .
|
|
2652
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: ``None``, will use `win_length // 2` .
|
|
2653
|
+
pad (int, optional): Two sided padding of signal. Default: ``0``.
|
|
2384
2654
|
window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
|
|
2385
|
-
can be WindowType.BARTLETT
|
|
2386
|
-
or WindowType.KAISER
|
|
2655
|
+
can be ``WindowType.BARTLETT``, ``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN``
|
|
2656
|
+
or ``WindowType.KAISER``. Default: ``WindowType.HANN``.
|
|
2387
2657
|
|
|
2388
2658
|
Raises:
|
|
2389
2659
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -2405,11 +2675,17 @@ class SpectralCentroid(TensorOperation):
|
|
|
2405
2675
|
|
|
2406
2676
|
Examples:
|
|
2407
2677
|
>>> import numpy as np
|
|
2678
|
+
>>> import mindspore.dataset as ds
|
|
2679
|
+
>>> import mindspore.dataset.audio as audio
|
|
2408
2680
|
>>>
|
|
2409
2681
|
>>> waveform = np.random.random([5, 10, 20])
|
|
2410
2682
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2411
2683
|
>>> transforms = [audio.SpectralCentroid(44100)]
|
|
2412
2684
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2685
|
+
|
|
2686
|
+
Tutorial Examples:
|
|
2687
|
+
- `Illustration of audio transforms
|
|
2688
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2413
2689
|
"""
|
|
2414
2690
|
|
|
2415
2691
|
@check_spectral_centroid
|
|
@@ -2432,21 +2708,21 @@ class Spectrogram(TensorOperation):
|
|
|
2432
2708
|
Create a spectrogram from an audio signal.
|
|
2433
2709
|
|
|
2434
2710
|
Args:
|
|
2435
|
-
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 400
|
|
2436
|
-
win_length (int, optional): Window size. Default: None
|
|
2437
|
-
hop_length (int, optional): Length of hop between STFT windows. Default: None
|
|
2438
|
-
pad (int, optional): Two sided padding of signal. Default: 0
|
|
2711
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: ``400``.
|
|
2712
|
+
win_length (int, optional): Window size. Default: ``None``, will use `n_fft` .
|
|
2713
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: ``None``, will use `win_length // 2` .
|
|
2714
|
+
pad (int, optional): Two sided padding of signal. Default: ``0``.
|
|
2439
2715
|
window (WindowType, optional): Window function that is applied/multiplied to each frame/window,
|
|
2440
|
-
can be WindowType.BARTLETT
|
|
2441
|
-
or WindowType.KAISER
|
|
2716
|
+
can be ``WindowType.BARTLETT``, ``WindowType.BLACKMAN``, ``WindowType.HAMMING``, ``WindowType.HANN``
|
|
2717
|
+
or ``WindowType.KAISER``. Currently, Kaiser window is not supported on macOS. Default: ``WindowType.HANN``.
|
|
2442
2718
|
power (float, optional): Exponent for the magnitude spectrogram, must be non negative,
|
|
2443
|
-
e.g., 1 for energy, 2 for power, etc. Default: 2.0
|
|
2444
|
-
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False
|
|
2445
|
-
center (bool, optional): Whether to pad waveform on both sides. Default: True
|
|
2446
|
-
pad_mode (BorderType, optional): Controls the padding method used when `center` is True
|
|
2447
|
-
can be BorderType.REFLECT
|
|
2448
|
-
Default: BorderType.REFLECT
|
|
2449
|
-
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True
|
|
2719
|
+
e.g., ``1`` for energy, ``2`` for power, etc. Default: ``2.0``.
|
|
2720
|
+
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: ``False``.
|
|
2721
|
+
center (bool, optional): Whether to pad waveform on both sides. Default: ``True``.
|
|
2722
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is ``True``,
|
|
2723
|
+
can be ``BorderType.REFLECT``, ``BorderType.CONSTANT``, ``BorderType.EDGE`` or ``BorderType.SYMMETRIC``.
|
|
2724
|
+
Default: ``BorderType.REFLECT``.
|
|
2725
|
+
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: ``True``.
|
|
2450
2726
|
|
|
2451
2727
|
Raises:
|
|
2452
2728
|
TypeError: If `n_fft` is not of type int.
|
|
@@ -2472,11 +2748,17 @@ class Spectrogram(TensorOperation):
|
|
|
2472
2748
|
|
|
2473
2749
|
Examples:
|
|
2474
2750
|
>>> import numpy as np
|
|
2751
|
+
>>> import mindspore.dataset as ds
|
|
2752
|
+
>>> import mindspore.dataset.audio as audio
|
|
2475
2753
|
>>>
|
|
2476
2754
|
>>> waveform = np.random.random([5, 10, 20])
|
|
2477
2755
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2478
2756
|
>>> transforms = [audio.Spectrogram()]
|
|
2479
2757
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2758
|
+
|
|
2759
|
+
Tutorial Examples:
|
|
2760
|
+
- `Illustration of audio transforms
|
|
2761
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2480
2762
|
"""
|
|
2481
2763
|
|
|
2482
2764
|
@check_spectrogram
|
|
@@ -2508,15 +2790,15 @@ class TimeMasking(AudioTensorOperation):
|
|
|
2508
2790
|
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
2509
2791
|
|
|
2510
2792
|
Args:
|
|
2511
|
-
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False
|
|
2512
|
-
time_mask_param (int, optional): When `iid_masks` is True
|
|
2513
|
-
from [0, time_mask_param]; When `iid_masks` is False
|
|
2793
|
+
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: ``False``.
|
|
2794
|
+
time_mask_param (int, optional): When `iid_masks` is ``True``, length of the mask will be uniformly sampled
|
|
2795
|
+
from [0, time_mask_param]; When `iid_masks` is ``False``, directly use it as length of the mask.
|
|
2514
2796
|
The value should be in range of [0, time_length], where `time_length` is the length of audio waveform
|
|
2515
|
-
in time domain. Default: 0
|
|
2516
|
-
mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is True
|
|
2517
|
-
be in range of [0, time_length - time_mask_param], where `time_length` is
|
|
2518
|
-
in time domain. Default: 0
|
|
2519
|
-
mask_value (float, optional): Value to assign to the masked columns. Default: 0.0
|
|
2797
|
+
in time domain. Default: ``0``.
|
|
2798
|
+
mask_start (int, optional): Starting point to apply mask, only works when `iid_masks` is ``True``.
|
|
2799
|
+
The value should be in range of [0, time_length - time_mask_param], where `time_length` is
|
|
2800
|
+
the length of audio waveform in time domain. Default: ``0``.
|
|
2801
|
+
mask_value (float, optional): Value to assign to the masked columns. Default: ``0.0``.
|
|
2520
2802
|
|
|
2521
2803
|
Raises:
|
|
2522
2804
|
TypeError: If `iid_masks` is not of type bool.
|
|
@@ -2533,12 +2815,18 @@ class TimeMasking(AudioTensorOperation):
|
|
|
2533
2815
|
|
|
2534
2816
|
Examples:
|
|
2535
2817
|
>>> import numpy as np
|
|
2818
|
+
>>> import mindspore.dataset as ds
|
|
2819
|
+
>>> import mindspore.dataset.audio as audio
|
|
2536
2820
|
>>>
|
|
2537
2821
|
>>> waveform = np.random.random([4, 3, 2])
|
|
2538
2822
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2539
2823
|
>>> transforms = [audio.TimeMasking(time_mask_param=1)]
|
|
2540
2824
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2541
2825
|
|
|
2826
|
+
Tutorial Examples:
|
|
2827
|
+
- `Illustration of audio transforms
|
|
2828
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2829
|
+
|
|
2542
2830
|
.. image:: time_masking_original.png
|
|
2543
2831
|
|
|
2544
2832
|
.. image:: time_masking.png
|
|
@@ -2566,9 +2854,9 @@ class TimeStretch(AudioTensorOperation):
|
|
|
2566
2854
|
|
|
2567
2855
|
Args:
|
|
2568
2856
|
hop_length (int, optional): Length of hop between STFT windows, i.e. the number of samples
|
|
2569
|
-
between consecutive frames. Default: None
|
|
2570
|
-
n_freq (int, optional): Number of filter banks from STFT. Default: 201
|
|
2571
|
-
fixed_rate (float, optional): Rate to speed up or slow down by. Default: None
|
|
2857
|
+
between consecutive frames. Default: ``None``, will use `n_freq - 1` .
|
|
2858
|
+
n_freq (int, optional): Number of filter banks from STFT. Default: ``201``.
|
|
2859
|
+
fixed_rate (float, optional): Rate to speed up or slow down by. Default: ``None``, will keep
|
|
2572
2860
|
the original rate.
|
|
2573
2861
|
|
|
2574
2862
|
Raises:
|
|
@@ -2585,12 +2873,18 @@ class TimeStretch(AudioTensorOperation):
|
|
|
2585
2873
|
|
|
2586
2874
|
Examples:
|
|
2587
2875
|
>>> import numpy as np
|
|
2876
|
+
>>> import mindspore.dataset as ds
|
|
2877
|
+
>>> import mindspore.dataset.audio as audio
|
|
2588
2878
|
>>>
|
|
2589
2879
|
>>> waveform = np.random.random([44, 10, 2])
|
|
2590
2880
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2591
2881
|
>>> transforms = [audio.TimeStretch()]
|
|
2592
2882
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2593
2883
|
|
|
2884
|
+
Tutorial Examples:
|
|
2885
|
+
- `Illustration of audio transforms
|
|
2886
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2887
|
+
|
|
2594
2888
|
.. image:: time_stretch_rate1.5.png
|
|
2595
2889
|
|
|
2596
2890
|
.. image:: time_stretch_original.png
|
|
@@ -2621,9 +2915,9 @@ class TrebleBiquad(AudioTensorOperation):
|
|
|
2621
2915
|
Args:
|
|
2622
2916
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
2623
2917
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
2624
|
-
central_freq (float, optional): Central frequency (in Hz). Default: 3000
|
|
2918
|
+
central_freq (float, optional): Central frequency (in Hz). Default: ``3000``.
|
|
2625
2919
|
Q (float, optional): `Quality factor <https://en.wikipedia.org/wiki/Q_factor>`_ ,
|
|
2626
|
-
in range of (0, 1]. Default: 0.707
|
|
2920
|
+
in range of (0, 1]. Default: ``0.707``.
|
|
2627
2921
|
|
|
2628
2922
|
Raises:
|
|
2629
2923
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -2639,11 +2933,17 @@ class TrebleBiquad(AudioTensorOperation):
|
|
|
2639
2933
|
|
|
2640
2934
|
Examples:
|
|
2641
2935
|
>>> import numpy as np
|
|
2936
|
+
>>> import mindspore.dataset as ds
|
|
2937
|
+
>>> import mindspore.dataset.audio as audio
|
|
2642
2938
|
>>>
|
|
2643
2939
|
>>> waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
|
|
2644
2940
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2645
2941
|
>>> transforms = [audio.TrebleBiquad(44100, 200.0)]
|
|
2646
2942
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2943
|
+
|
|
2944
|
+
Tutorial Examples:
|
|
2945
|
+
- `Illustration of audio transforms
|
|
2946
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2647
2947
|
"""
|
|
2648
2948
|
|
|
2649
2949
|
@check_treble_biquad
|
|
@@ -2668,34 +2968,34 @@ class Vad(AudioTensorOperation):
|
|
|
2668
2968
|
|
|
2669
2969
|
Args:
|
|
2670
2970
|
sample_rate (int): Sampling rate of audio signal.
|
|
2671
|
-
trigger_level (float, optional): The measurement level used to trigger activity detection. Default: 7.0
|
|
2971
|
+
trigger_level (float, optional): The measurement level used to trigger activity detection. Default: ``7.0``.
|
|
2672
2972
|
trigger_time (float, optional): The time constant (in seconds) used to help ignore short bursts of
|
|
2673
|
-
sounds. Default: 0.25
|
|
2973
|
+
sounds. Default: ``0.25``.
|
|
2674
2974
|
search_time (float, optional): The amount of audio (in seconds) to search for quieter/shorter bursts of audio
|
|
2675
|
-
to include prior to the detected trigger point. Default: 1.0
|
|
2975
|
+
to include prior to the detected trigger point. Default: ``1.0``.
|
|
2676
2976
|
allowed_gap (float, optional): The allowed gap (in seconds) between quieter/shorter bursts of audio to include
|
|
2677
|
-
prior to the detected trigger point. Default: 0.25
|
|
2977
|
+
prior to the detected trigger point. Default: ``0.25``.
|
|
2678
2978
|
pre_trigger_time (float, optional): The amount of audio (in seconds) to preserve before the trigger point and
|
|
2679
|
-
any found quieter/shorter bursts. Default: 0.0
|
|
2680
|
-
boot_time (float, optional): The time for the initial noise estimate. Default: 0.35
|
|
2979
|
+
any found quieter/shorter bursts. Default: ``0.0``.
|
|
2980
|
+
boot_time (float, optional): The time for the initial noise estimate. Default: ``0.35``.
|
|
2681
2981
|
noise_up_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level is
|
|
2682
|
-
increasing. Default: 0.1
|
|
2982
|
+
increasing. Default: ``0.1``.
|
|
2683
2983
|
noise_down_time (float, optional): Time constant used by the adaptive noise estimator for when the noise level
|
|
2684
|
-
is decreasing. Default: 0.01
|
|
2984
|
+
is decreasing. Default: ``0.01``.
|
|
2685
2985
|
noise_reduction_amount (float, optional): Amount of noise reduction to use in the detection algorithm.
|
|
2686
2986
|
Default: 1.35.
|
|
2687
|
-
measure_freq (float, optional): Frequency of the algorithm's processing/measurements. Default: 20.0
|
|
2688
|
-
measure_duration (float, optional): The duration of measurement. Default: None
|
|
2689
|
-
period.
|
|
2690
|
-
measure_smooth_time (float, optional): Time constant used to smooth spectral measurements. Default: 0.4
|
|
2987
|
+
measure_freq (float, optional): Frequency of the algorithm's processing/measurements. Default: ``20.0``.
|
|
2988
|
+
measure_duration (float, optional): The duration of measurement. Default: ``None``,
|
|
2989
|
+
will use twice the measurement period.
|
|
2990
|
+
measure_smooth_time (float, optional): Time constant used to smooth spectral measurements. Default: ``0.4``.
|
|
2691
2991
|
hp_filter_freq (float, optional): The 'Brick-wall' frequency of high-pass filter applied at the input to the
|
|
2692
|
-
detector algorithm. Default: 50.0
|
|
2992
|
+
detector algorithm. Default: ``50.0``.
|
|
2693
2993
|
lp_filter_freq (float, optional): The 'Brick-wall' frequency of low-pass filter applied at the input to the
|
|
2694
|
-
detector algorithm. Default: 6000.0
|
|
2994
|
+
detector algorithm. Default: ``6000.0``.
|
|
2695
2995
|
hp_lifter_freq (float, optional): The 'Brick-wall' frequency of high-pass lifter used in the
|
|
2696
|
-
detector algorithm. Default: 150.0
|
|
2996
|
+
detector algorithm. Default: ``150.0``.
|
|
2697
2997
|
lp_lifter_freq (float, optional): The 'Brick-wall' frequency of low-pass lifter used in the
|
|
2698
|
-
detector algorithm. Default: 2000.0
|
|
2998
|
+
detector algorithm. Default: ``2000.0``.
|
|
2699
2999
|
|
|
2700
3000
|
Raises:
|
|
2701
3001
|
TypeError: If `sample_rate` is not of type int.
|
|
@@ -2739,11 +3039,17 @@ class Vad(AudioTensorOperation):
|
|
|
2739
3039
|
|
|
2740
3040
|
Examples:
|
|
2741
3041
|
>>> import numpy as np
|
|
3042
|
+
>>> import mindspore.dataset as ds
|
|
3043
|
+
>>> import mindspore.dataset.audio as audio
|
|
2742
3044
|
>>>
|
|
2743
3045
|
>>> waveform = np.random.random([2, 1000])
|
|
2744
3046
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2745
3047
|
>>> transforms = [audio.Vad(sample_rate=600)]
|
|
2746
3048
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
3049
|
+
|
|
3050
|
+
Tutorial Examples:
|
|
3051
|
+
- `Illustration of audio transforms
|
|
3052
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2747
3053
|
"""
|
|
2748
3054
|
|
|
2749
3055
|
@check_vad
|
|
@@ -2789,17 +3095,17 @@ class Vol(AudioTensorOperation):
|
|
|
2789
3095
|
|
|
2790
3096
|
Args:
|
|
2791
3097
|
gain (float): Gain at the boost (or attenuation).
|
|
2792
|
-
If `gain_type` is GainType.AMPLITUDE
|
|
2793
|
-
If `gain_type` is GainType.POWER
|
|
2794
|
-
If `gain_type` is GainType.DB
|
|
2795
|
-
gain_type (GainType, optional): Type of gain, can be GainType.AMPLITUDE
|
|
2796
|
-
or GainType.DB
|
|
3098
|
+
If `gain_type` is ``GainType.AMPLITUDE``, it is a non negative amplitude ratio.
|
|
3099
|
+
If `gain_type` is ``GainType.POWER``, it is a power (voltage squared).
|
|
3100
|
+
If `gain_type` is ``GainType.DB``, it is in decibels.
|
|
3101
|
+
gain_type (GainType, optional): Type of gain, can be ``GainType.AMPLITUDE``, ``GainType.POWER``
|
|
3102
|
+
or ``GainType.DB``. Default: ``GainType.AMPLITUDE``.
|
|
2797
3103
|
|
|
2798
3104
|
Raises:
|
|
2799
3105
|
TypeError: If `gain` is not of type float.
|
|
2800
3106
|
TypeError: If `gain_type` is not of type :class:`mindspore.dataset.audio.GainType` .
|
|
2801
|
-
ValueError: If `gain` is a negative number when `gain_type` is GainType.AMPLITUDE
|
|
2802
|
-
ValueError: If `gain` is not a positive number when `gain_type` is GainType.POWER
|
|
3107
|
+
ValueError: If `gain` is a negative number when `gain_type` is ``GainType.AMPLITUDE``.
|
|
3108
|
+
ValueError: If `gain` is not a positive number when `gain_type` is ``GainType.POWER``.
|
|
2803
3109
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2804
3110
|
|
|
2805
3111
|
Supported Platforms:
|
|
@@ -2807,12 +3113,18 @@ class Vol(AudioTensorOperation):
|
|
|
2807
3113
|
|
|
2808
3114
|
Examples:
|
|
2809
3115
|
>>> import numpy as np
|
|
3116
|
+
>>> import mindspore.dataset as ds
|
|
3117
|
+
>>> import mindspore.dataset.audio as audio
|
|
2810
3118
|
>>> from mindspore.dataset.audio import GainType
|
|
2811
3119
|
>>>
|
|
2812
3120
|
>>> waveform = np.random.random([20, 30])
|
|
2813
3121
|
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2814
3122
|
>>> transforms = [audio.Vol(gain=10, gain_type=GainType.DB)]
|
|
2815
3123
|
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
3124
|
+
|
|
3125
|
+
Tutorial Examples:
|
|
3126
|
+
- `Illustration of audio transforms
|
|
3127
|
+
<https://www.mindspore.cn/docs/en/r2.2/api_python/samples/dataset/audio_gallery.html>`_
|
|
2816
3128
|
"""
|
|
2817
3129
|
|
|
2818
3130
|
@check_vol
|