mindspore 2.0.0a0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -2
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/amp.py +52 -57
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -22,15 +22,16 @@ their training models.
|
|
|
22
22
|
import numpy as np
|
|
23
23
|
|
|
24
24
|
import mindspore._c_dataengine as cde
|
|
25
|
-
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation,
|
|
26
|
-
ResampleMethod, ScaleType, WindowType
|
|
25
|
+
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
|
|
26
|
+
NormType, ResampleMethod, ScaleType, WindowType
|
|
27
27
|
from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
|
|
28
28
|
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
|
|
29
29
|
check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
|
|
30
30
|
check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
|
|
31
|
-
check_highpass_biquad, check_inverse_mel_scale,
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
check_highpass_biquad, check_inverse_mel_scale, check_inverse_spectrogram, check_lfcc, check_lfilter, \
|
|
32
|
+
check_lowpass_biquad, check_magphase, check_mask_along_axis, check_mask_along_axis_iid, check_masking, \
|
|
33
|
+
check_mel_scale, check_mel_spectrogram, check_mfcc, check_mu_law_coding, check_overdrive, check_phase_vocoder, \
|
|
34
|
+
check_phaser, check_pitch_shift, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
|
|
34
35
|
check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vad, check_vol
|
|
35
36
|
from ..transforms.py_transforms_util import Implementation
|
|
36
37
|
from ..transforms.transforms import TensorOperation
|
|
@@ -68,7 +69,7 @@ class AllpassBiquad(AudioTensorOperation):
|
|
|
68
69
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
69
70
|
|
|
70
71
|
Note:
|
|
71
|
-
The
|
|
72
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
72
73
|
|
|
73
74
|
Args:
|
|
74
75
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -116,7 +117,7 @@ class AmplitudeToDB(AudioTensorOperation):
|
|
|
116
117
|
Turn the input audio waveform from the amplitude/power scale to decibel scale.
|
|
117
118
|
|
|
118
119
|
Note:
|
|
119
|
-
The
|
|
120
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
120
121
|
|
|
121
122
|
Args:
|
|
122
123
|
stype (ScaleType, optional): Scale of the input waveform, which can be
|
|
@@ -170,7 +171,7 @@ class Angle(AudioTensorOperation):
|
|
|
170
171
|
Calculate the angle of complex number sequence.
|
|
171
172
|
|
|
172
173
|
Note:
|
|
173
|
-
The
|
|
174
|
+
The shape of the audio waveform to be processed needs to be <..., complex=2>.
|
|
174
175
|
The first dimension represents the real part while the second represents the imaginary.
|
|
175
176
|
|
|
176
177
|
Raises:
|
|
@@ -203,7 +204,7 @@ class BandBiquad(AudioTensorOperation):
|
|
|
203
204
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
204
205
|
|
|
205
206
|
Note:
|
|
206
|
-
The
|
|
207
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
207
208
|
|
|
208
209
|
Args:
|
|
209
210
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -264,7 +265,7 @@ class BandpassBiquad(AudioTensorOperation):
|
|
|
264
265
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
265
266
|
|
|
266
267
|
Note:
|
|
267
|
-
The
|
|
268
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
268
269
|
|
|
269
270
|
Args:
|
|
270
271
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -323,7 +324,7 @@ class BandrejectBiquad(AudioTensorOperation):
|
|
|
323
324
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
324
325
|
|
|
325
326
|
Note:
|
|
326
|
-
The
|
|
327
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
327
328
|
|
|
328
329
|
Args:
|
|
329
330
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -375,7 +376,7 @@ class BassBiquad(AudioTensorOperation):
|
|
|
375
376
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
376
377
|
|
|
377
378
|
Note:
|
|
378
|
-
The
|
|
379
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
379
380
|
|
|
380
381
|
Args:
|
|
381
382
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -426,10 +427,22 @@ class Biquad(TensorOperation):
|
|
|
426
427
|
b0 (float): Numerator coefficient of current input, x[n].
|
|
427
428
|
b1 (float): Numerator coefficient of input one time step ago x[n-1].
|
|
428
429
|
b2 (float): Numerator coefficient of input two time steps ago x[n-2].
|
|
429
|
-
a0 (float): Denominator coefficient of current output y[n], the value can't be
|
|
430
|
+
a0 (float): Denominator coefficient of current output y[n], the value can't be 0, typically 1.
|
|
430
431
|
a1 (float): Denominator coefficient of current output y[n-1].
|
|
431
432
|
a2 (float): Denominator coefficient of current output y[n-2].
|
|
432
433
|
|
|
434
|
+
Raises:
|
|
435
|
+
TypeError: If `b0` is not of type float.
|
|
436
|
+
TypeError: If `b1` is not of type float.
|
|
437
|
+
TypeError: If `b2` is not of type float.
|
|
438
|
+
TypeError: If `a0` is not of type float.
|
|
439
|
+
TypeError: If `a1` is not of type float.
|
|
440
|
+
TypeError: If `a2` is not of type float.
|
|
441
|
+
ValueError: If `a0` is 0.
|
|
442
|
+
|
|
443
|
+
Supported Platforms:
|
|
444
|
+
``CPU``
|
|
445
|
+
|
|
433
446
|
Examples:
|
|
434
447
|
>>> import numpy as np
|
|
435
448
|
>>>
|
|
@@ -457,7 +470,7 @@ class ComplexNorm(AudioTensorOperation):
|
|
|
457
470
|
Compute the norm of complex number sequence.
|
|
458
471
|
|
|
459
472
|
Note:
|
|
460
|
-
The
|
|
473
|
+
The shape of the audio waveform to be processed needs to be <..., complex=2>.
|
|
461
474
|
The first dimension represents the real part while the second represents the imaginary.
|
|
462
475
|
|
|
463
476
|
Args:
|
|
@@ -528,6 +541,9 @@ class ComputeDeltas(AudioTensorOperation):
|
|
|
528
541
|
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
529
542
|
RuntimeError: If input tensor is not in shape of <..., freq, time>.
|
|
530
543
|
|
|
544
|
+
Supported Platforms:
|
|
545
|
+
``CPU``
|
|
546
|
+
|
|
531
547
|
Examples:
|
|
532
548
|
>>> import numpy as np
|
|
533
549
|
>>> from mindspore.dataset.audio import BorderType
|
|
@@ -557,7 +573,7 @@ class Contrast(AudioTensorOperation):
|
|
|
557
573
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
558
574
|
|
|
559
575
|
Note:
|
|
560
|
-
The
|
|
576
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
561
577
|
|
|
562
578
|
Args:
|
|
563
579
|
enhancement_amount (float, optional): Controls the amount of the enhancement,
|
|
@@ -598,6 +614,13 @@ class DBToAmplitude(AudioTensorOperation):
|
|
|
598
614
|
ref (float): Reference which the output will be scaled by.
|
|
599
615
|
power (float): If power equals 1, will compute DB to power. If 0.5, will compute DB to amplitude.
|
|
600
616
|
|
|
617
|
+
Raises:
|
|
618
|
+
TypeError: If `ref` is not of type float.
|
|
619
|
+
TypeError: If `power` is not of type float.
|
|
620
|
+
|
|
621
|
+
Supported Platforms:
|
|
622
|
+
``CPU``
|
|
623
|
+
|
|
601
624
|
Examples:
|
|
602
625
|
>>> import numpy as np
|
|
603
626
|
>>>
|
|
@@ -624,7 +647,15 @@ class DCShift(AudioTensorOperation):
|
|
|
624
647
|
Args:
|
|
625
648
|
shift (float): The amount to shift the audio, the value must be in the range [-2.0, 2.0].
|
|
626
649
|
limiter_gain (float, optional): Used only on peaks to prevent clipping,
|
|
627
|
-
the value should be much less than 1, such as 0.05 or 0.02.
|
|
650
|
+
the value should be much less than 1, such as 0.05 or 0.02. Default: None, will be set to `shift` .
|
|
651
|
+
|
|
652
|
+
Raises:
|
|
653
|
+
TypeError: If `shift` is not of type float.
|
|
654
|
+
ValueError: If `shift` is not in range [-2.0, 2.0].
|
|
655
|
+
TypeError: If `limiter_gain` is not of type float.
|
|
656
|
+
|
|
657
|
+
Supported Platforms:
|
|
658
|
+
``CPU``
|
|
628
659
|
|
|
629
660
|
Examples:
|
|
630
661
|
>>> import numpy as np
|
|
@@ -659,6 +690,9 @@ class DeemphBiquad(AudioTensorOperation):
|
|
|
659
690
|
ValueError: If `sample_rate` is not 44100 or 48000.
|
|
660
691
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
661
692
|
|
|
693
|
+
Supported Platforms:
|
|
694
|
+
``CPU``
|
|
695
|
+
|
|
662
696
|
Examples:
|
|
663
697
|
>>> import numpy as np
|
|
664
698
|
>>>
|
|
@@ -693,6 +727,21 @@ class DetectPitchFrequency(AudioTensorOperation):
|
|
|
693
727
|
freq_high (int, optional): Highest frequency that can be detected (Hz), the value must be greater than zero.
|
|
694
728
|
Default: 3400.
|
|
695
729
|
|
|
730
|
+
Raises:
|
|
731
|
+
TypeError: If `sample_rate` is not of type int.
|
|
732
|
+
ValueError: If `sample_rate` is 0.
|
|
733
|
+
TypeError: If `frame_time` is not of type float.
|
|
734
|
+
ValueError: If `frame_time` is not positive.
|
|
735
|
+
TypeError: If `win_length` is not of type int.
|
|
736
|
+
ValueError: If `win_length` is not positive.
|
|
737
|
+
TypeError: If `freq_low` is not of type int.
|
|
738
|
+
ValueError: If `freq_low` is not positive.
|
|
739
|
+
TypeError: If `freq_high` is not of type int.
|
|
740
|
+
ValueError: If `freq_high` is not positive.
|
|
741
|
+
|
|
742
|
+
Supported Platforms:
|
|
743
|
+
``CPU``
|
|
744
|
+
|
|
696
745
|
Examples:
|
|
697
746
|
>>> import numpy as np
|
|
698
747
|
>>>
|
|
@@ -741,6 +790,9 @@ class Dither(AudioTensorOperation):
|
|
|
741
790
|
TypeError: If `noise_shaping` is not of type bool.
|
|
742
791
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
743
792
|
|
|
793
|
+
Supported Platforms:
|
|
794
|
+
``CPU``
|
|
795
|
+
|
|
744
796
|
Examples:
|
|
745
797
|
>>> import numpy as np
|
|
746
798
|
>>>
|
|
@@ -767,11 +819,22 @@ class EqualizerBiquad(AudioTensorOperation):
|
|
|
767
819
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
768
820
|
|
|
769
821
|
Args:
|
|
770
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be
|
|
822
|
+
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
771
823
|
center_freq (float): Central frequency (in Hz).
|
|
772
824
|
gain (float): Desired gain at the boost (or attenuation) in dB.
|
|
773
825
|
Q (float, optional): https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
|
|
774
826
|
|
|
827
|
+
Raises:
|
|
828
|
+
TypeError: If `sample_rate` is not of type int.
|
|
829
|
+
ValueError: If `sample_rate` is 0.
|
|
830
|
+
TypeError: If `center_freq` is not of type float.
|
|
831
|
+
TypeError: If `gain` is not of type float.
|
|
832
|
+
TypeError: If `Q` is not of type float.
|
|
833
|
+
ValueError: If `Q` is not in range of (0, 1].
|
|
834
|
+
|
|
835
|
+
Supported Platforms:
|
|
836
|
+
``CPU``
|
|
837
|
+
|
|
775
838
|
Examples:
|
|
776
839
|
>>> import numpy as np
|
|
777
840
|
>>>
|
|
@@ -824,6 +887,9 @@ class Fade(AudioTensorOperation):
|
|
|
824
887
|
RuntimeError: If fade_in_len exceeds waveform length.
|
|
825
888
|
RuntimeError: If fade_out_len exceeds waveform length.
|
|
826
889
|
|
|
890
|
+
Supported Platforms:
|
|
891
|
+
``CPU``
|
|
892
|
+
|
|
827
893
|
Examples:
|
|
828
894
|
>>> import numpy as np
|
|
829
895
|
>>> from mindspore.dataset.audio import FadeShape
|
|
@@ -850,16 +916,20 @@ class Filtfilt(AudioTensorOperation):
|
|
|
850
916
|
Apply an IIR filter forward and backward to a waveform.
|
|
851
917
|
|
|
852
918
|
Args:
|
|
853
|
-
a_coeffs (Sequence):
|
|
919
|
+
a_coeffs (Sequence[float]): Denominator coefficients of difference equation of dimension.
|
|
854
920
|
Lower delays coefficients are first, e.g. [a0, a1, a2, ...].
|
|
855
921
|
Must be same size as b_coeffs (pad with 0's as necessary).
|
|
856
|
-
b_coeffs (Sequence):
|
|
922
|
+
b_coeffs (Sequence[float]): Numerator coefficients of difference equation of dimension.
|
|
857
923
|
Lower delays coefficients are first, e.g. [b0, b1, b2, ...].
|
|
858
924
|
Must be same size as a_coeffs (pad with 0's as necessary).
|
|
859
925
|
clamp (bool, optional): If True, clamp the output signal to be in the range [-1, 1]. Default: True.
|
|
860
926
|
|
|
861
927
|
Raises:
|
|
862
|
-
|
|
928
|
+
TypeError: If `a_coeffs` is not of type Sequence[float].
|
|
929
|
+
TypeError: If `b_coeffs` is not of type Sequence[float].
|
|
930
|
+
ValueError: If `a_coeffs` and `b_coeffs` are of different sizes.
|
|
931
|
+
TypeError: If `clamp` is not of type bool.
|
|
932
|
+
RuntimeError: If shape of the input audio is not <..., time>.
|
|
863
933
|
|
|
864
934
|
Examples:
|
|
865
935
|
>>> import numpy as np
|
|
@@ -928,6 +998,9 @@ class Flanger(AudioTensorOperation):
|
|
|
928
998
|
TypeError: If `interpolation` is not of type :class:`mindspore.dataset.audio.Interpolation` .
|
|
929
999
|
RuntimeError: If input tensor is not in shape of <..., channel, time>.
|
|
930
1000
|
|
|
1001
|
+
Supported Platforms:
|
|
1002
|
+
``CPU``
|
|
1003
|
+
|
|
931
1004
|
Examples:
|
|
932
1005
|
>>> import numpy as np
|
|
933
1006
|
>>>
|
|
@@ -962,7 +1035,7 @@ class FrequencyMasking(AudioTensorOperation):
|
|
|
962
1035
|
Apply masking to a spectrogram in the frequency domain.
|
|
963
1036
|
|
|
964
1037
|
Note:
|
|
965
|
-
The
|
|
1038
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
966
1039
|
|
|
967
1040
|
Args:
|
|
968
1041
|
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
|
|
@@ -1021,6 +1094,12 @@ class Gain(AudioTensorOperation):
|
|
|
1021
1094
|
Args:
|
|
1022
1095
|
gain_db (float): Gain adjustment in decibels (dB). Default: 1.0.
|
|
1023
1096
|
|
|
1097
|
+
Raises:
|
|
1098
|
+
TypeError: If `gain_db` is not of type float.
|
|
1099
|
+
|
|
1100
|
+
Supported Platforms:
|
|
1101
|
+
``CPU``
|
|
1102
|
+
|
|
1024
1103
|
Examples:
|
|
1025
1104
|
>>> import numpy as np
|
|
1026
1105
|
>>>
|
|
@@ -1049,8 +1128,9 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1049
1128
|
Args:
|
|
1050
1129
|
n_fft (int, optional): Size of FFT. Default: 400.
|
|
1051
1130
|
n_iter (int, optional): Number of iteration for phase recovery. Default: 32.
|
|
1052
|
-
win_length (int, optional): Window size for GriffinLim. Default: None, will be set to n_fft.
|
|
1053
|
-
hop_length (int, optional): Length of hop between STFT windows.
|
|
1131
|
+
win_length (int, optional): Window size for GriffinLim. Default: None, will be set to `n_fft` .
|
|
1132
|
+
hop_length (int, optional): Length of hop between STFT windows.
|
|
1133
|
+
Default: None, will be set to `win_length // 2` .
|
|
1054
1134
|
window_type (WindowType, optional): Window type for GriffinLim, which can be WindowType.BARTLETT,
|
|
1055
1135
|
WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN or WindowType.KAISER. Default: WindowType.HANN.
|
|
1056
1136
|
Currently kaiser window is not supported on macOS.
|
|
@@ -1062,9 +1142,28 @@ class GriffinLim(AudioTensorOperation):
|
|
|
1062
1142
|
Default: True.
|
|
1063
1143
|
|
|
1064
1144
|
Raises:
|
|
1145
|
+
TypeError: If `n_fft` is not of type int.
|
|
1146
|
+
ValueError: If `n_fft` is not positive.
|
|
1147
|
+
TypeError: If `n_iter` is not of type int.
|
|
1148
|
+
ValueError: If `n_iter` is not positive.
|
|
1149
|
+
TypeError: If `win_length` is not of type int.
|
|
1150
|
+
ValueError: If `win_length` is a negative number.
|
|
1151
|
+
TypeError: If `hop_length` is not of type int.
|
|
1152
|
+
ValueError: If `hop_length` is a negative number.
|
|
1153
|
+
TypeError: If `window_type` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1154
|
+
TypeError: If `power` is not of type float.
|
|
1155
|
+
ValueError: If `power` is not positive.
|
|
1156
|
+
TypeError: If `momentum` is not of type float.
|
|
1157
|
+
ValueError: If `momentum` is a negative number.
|
|
1158
|
+
TypeError: If `length` is not of type int.
|
|
1159
|
+
ValueError: If `length` is a negative number.
|
|
1160
|
+
TypeError: If `rand_init` is not of type bool.
|
|
1065
1161
|
RuntimeError: If `n_fft` is not less than `length` .
|
|
1066
1162
|
RuntimeError: If `win_length` is not less than `n_fft` .
|
|
1067
1163
|
|
|
1164
|
+
Supported Platforms:
|
|
1165
|
+
``CPU``
|
|
1166
|
+
|
|
1068
1167
|
Examples:
|
|
1069
1168
|
>>> import numpy as np
|
|
1070
1169
|
>>>
|
|
@@ -1101,12 +1200,20 @@ class HighpassBiquad(AudioTensorOperation):
|
|
|
1101
1200
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1102
1201
|
|
|
1103
1202
|
Args:
|
|
1104
|
-
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be
|
|
1203
|
+
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be 0.
|
|
1105
1204
|
cutoff_freq (float): Filter cutoff frequency (in Hz).
|
|
1106
1205
|
Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1]. Default: 0.707.
|
|
1107
1206
|
|
|
1108
1207
|
Raises:
|
|
1109
|
-
|
|
1208
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1209
|
+
ValueError: If `sample_rate` is 0.
|
|
1210
|
+
TypeError: If `cutoff_freq` is not of type float.
|
|
1211
|
+
TypeError: If `Q` is not of type float.
|
|
1212
|
+
ValueError: If `Q` is not in range of (0, 1].
|
|
1213
|
+
RuntimeError: If the shape of input audio waveform does not match <..., time>.
|
|
1214
|
+
|
|
1215
|
+
Supported Platforms:
|
|
1216
|
+
``CPU``
|
|
1110
1217
|
|
|
1111
1218
|
Examples:
|
|
1112
1219
|
>>> import numpy as np
|
|
@@ -1144,9 +1251,33 @@ class InverseMelScale(AudioTensorOperation):
|
|
|
1144
1251
|
sgdargs (dict, optional): Arguments for the SGD optimizer. Default: None, will be set to
|
|
1145
1252
|
{'sgd_lr': 0.1, 'sgd_momentum': 0.9}.
|
|
1146
1253
|
norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE.
|
|
1147
|
-
Default: NormType.NONE.
|
|
1254
|
+
Default: NormType.NONE, no narmalization.
|
|
1148
1255
|
mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1149
1256
|
|
|
1257
|
+
Raises:
|
|
1258
|
+
TypeError: If `n_stft` is not of type int.
|
|
1259
|
+
ValueError: If `n_stft` is not positive.
|
|
1260
|
+
TypeError: If `n_mels` is not of type int.
|
|
1261
|
+
ValueError: If `n_mels` is not positive.
|
|
1262
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1263
|
+
ValueError: If `sample_rate` is not positive.
|
|
1264
|
+
TypeError: If `f_min` is not of type float.
|
|
1265
|
+
ValueError: If `f_min` is greater than or equal to `f_max` .
|
|
1266
|
+
TypeError: If `f_max` is not of type float.
|
|
1267
|
+
ValueError: If `f_max` is a negative number.
|
|
1268
|
+
TypeError: If `max_iter` is not of type int.
|
|
1269
|
+
ValueError: If `max_iter` is a negative number.
|
|
1270
|
+
TypeError: If `tolerance_loss` is not of type float.
|
|
1271
|
+
ValueError: If `tolerance_loss` is a negative number.
|
|
1272
|
+
TypeError: If `tolerance_change` is not of type float.
|
|
1273
|
+
ValueError: If `tolerance_change` is a negative number.
|
|
1274
|
+
TypeError: If `sgdargs` is not of type dict.
|
|
1275
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1276
|
+
TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1277
|
+
|
|
1278
|
+
Supported Platforms:
|
|
1279
|
+
``CPU``
|
|
1280
|
+
|
|
1150
1281
|
Examples:
|
|
1151
1282
|
>>> import numpy as np
|
|
1152
1283
|
>>>
|
|
@@ -1181,6 +1312,177 @@ class InverseMelScale(AudioTensorOperation):
|
|
|
1181
1312
|
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
|
|
1182
1313
|
|
|
1183
1314
|
|
|
1315
|
+
class InverseSpectrogram(AudioTensorOperation):
|
|
1316
|
+
"""
|
|
1317
|
+
Create an inverse spectrogram to recover an audio signal from a spectrogram.
|
|
1318
|
+
|
|
1319
|
+
Args:
|
|
1320
|
+
length (int, optional): The output length of the waveform, must be non negative. Default: None,
|
|
1321
|
+
means to output the whole waveform.
|
|
1322
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0.
|
|
1323
|
+
Default: 400.
|
|
1324
|
+
win_length (int, optional): Window size, which should be greater than 0.
|
|
1325
|
+
Default: None, will be set to `n_fft` .
|
|
1326
|
+
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1327
|
+
Default: None, will be set to `win_length // 2` .
|
|
1328
|
+
pad (int, optional): Two sided padding of signal, cannot be less than 0. Default: 0.
|
|
1329
|
+
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1330
|
+
frame/window. Default: WindowType.HANN.
|
|
1331
|
+
normalized (bool, optional): Whether the spectrogram was normalized by magnitude after stft. Default: False.
|
|
1332
|
+
center (bool, optional): Whether the signal in spectrogram was padded on both sides. Default: True.
|
|
1333
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
|
|
1334
|
+
can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
|
|
1335
|
+
Default: BorderType.REFLECT.
|
|
1336
|
+
onesided (bool, optional): Controls whether spectrogram was used to return half of results to avoid
|
|
1337
|
+
redundancy. Default: True.
|
|
1338
|
+
|
|
1339
|
+
Raises:
|
|
1340
|
+
TypeError: If `length` is not of type int.
|
|
1341
|
+
ValueError: If `length` is a negative number.
|
|
1342
|
+
TypeError: If `n_fft` is not of type int.
|
|
1343
|
+
ValueError: If `n_fft` is not positive.
|
|
1344
|
+
TypeError: If `win_length` is not of type int.
|
|
1345
|
+
ValueError: If `win_length` is not positive.
|
|
1346
|
+
TypeError: If `hop_length` is not of type int.
|
|
1347
|
+
ValueError: If `hop_length` is not positive.
|
|
1348
|
+
TypeError: If `pad` is not of type int.
|
|
1349
|
+
ValueError: If `pad` is a negative number.
|
|
1350
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1351
|
+
TypeError: If `normalized` is not of type bool.
|
|
1352
|
+
TypeError: If `center` is not of type bool.
|
|
1353
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
1354
|
+
TypeError: If `onesided` is not of type bool.
|
|
1355
|
+
|
|
1356
|
+
Supported Platforms:
|
|
1357
|
+
``CPU``
|
|
1358
|
+
|
|
1359
|
+
Examples:
|
|
1360
|
+
>>> import numpy as np
|
|
1361
|
+
>>>
|
|
1362
|
+
>>> waveform = np.array([[[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1363
|
+
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]]])
|
|
1364
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1365
|
+
>>> transforms = [audio.InverseSpectrogram(1, 400, 400, 200)]
|
|
1366
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1367
|
+
"""
|
|
1368
|
+
|
|
1369
|
+
@check_inverse_spectrogram
|
|
1370
|
+
def __init__(self, length=None, n_fft=400, win_length=None, hop_length=None, pad=0,
|
|
1371
|
+
window=WindowType.HANN, normalized=False, center=True,
|
|
1372
|
+
pad_mode=BorderType.REFLECT, onesided=True):
|
|
1373
|
+
super().__init__()
|
|
1374
|
+
self.length = length if length is not None else 0
|
|
1375
|
+
self.n_fft = n_fft
|
|
1376
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
1377
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
|
|
1378
|
+
self.pad = pad
|
|
1379
|
+
self.window = window
|
|
1380
|
+
self.normalized = normalized
|
|
1381
|
+
self.center = center
|
|
1382
|
+
self.pad_mode = pad_mode
|
|
1383
|
+
self.onesided = onesided
|
|
1384
|
+
|
|
1385
|
+
def parse(self):
|
|
1386
|
+
return cde.InverseSpectrogramOperation(self.length, self.n_fft, self.win_length, self.hop_length, self.pad,
|
|
1387
|
+
DE_C_WINDOW_TYPE.get(self.window), self.normalized, self.center,
|
|
1388
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided)
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
DE_C_NORM_MODE = {NormMode.ORTHO: cde.NormMode.DE_NORM_MODE_ORTHO,
|
|
1392
|
+
NormMode.NONE: cde.NormMode.DE_NORM_MODE_NONE}
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
class LFCC(AudioTensorOperation):
|
|
1396
|
+
"""
|
|
1397
|
+
Create LFCC for a raw audio signal.
|
|
1398
|
+
|
|
1399
|
+
Note:
|
|
1400
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
1401
|
+
|
|
1402
|
+
Args:
|
|
1403
|
+
sample_rate (int, optional): Sample rate of audio signal. Default: 16000.
|
|
1404
|
+
n_filter (int, optional) : Number of linear filters to apply. Default: 128.
|
|
1405
|
+
n_lfcc (int, optional) : Number of lfc coefficients to retain. Default: 40.
|
|
1406
|
+
f_min (float, optional): Minimum frequency. Default: 0.0.
|
|
1407
|
+
f_max (float, optional): Maximum frequency. Default: None, will be set to `sample_rate // 2` .
|
|
1408
|
+
dct_type (int, optional) : Type of DCT to use. The value can only be 2. Default: 2.
|
|
1409
|
+
norm (NormMode, optional) : Norm to use. Default: NormMode.ORTHO.
|
|
1410
|
+
log_lf (bool, optional) : Whether to use log-lf spectrograms instead of db-scaled. Default: False.
|
|
1411
|
+
speckwargs (dict, optional) : Arguments for :class:`mindspore.dataset.audio.Spectrogram`.
|
|
1412
|
+
Default: None, the default setting is a dict including
|
|
1413
|
+
|
|
1414
|
+
- 'n_fft': 400
|
|
1415
|
+
- 'win_length': n_fft
|
|
1416
|
+
- 'hop_length': win_length // 2
|
|
1417
|
+
- 'pad': 0
|
|
1418
|
+
- 'window': WindowType.HANN
|
|
1419
|
+
- 'power': 2.0
|
|
1420
|
+
- 'normalized': False
|
|
1421
|
+
- 'center': True
|
|
1422
|
+
- 'pad_mode': BorderType.REFLECT
|
|
1423
|
+
- 'onesided': True
|
|
1424
|
+
|
|
1425
|
+
Raises:
|
|
1426
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1427
|
+
TypeError: If `n_filter` is not of type int.
|
|
1428
|
+
TypeError: If `n_lfcc` is not of type int.
|
|
1429
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
|
|
1430
|
+
TypeError: If `log_lf` is not of type bool.
|
|
1431
|
+
TypeError: If `speckwargs` is not of type dict.
|
|
1432
|
+
ValueError: If `sample_rate` is 0.
|
|
1433
|
+
ValueError: If `n_lfcc` is less than 0.
|
|
1434
|
+
ValueError: If `f_min` is greater than `f_max` .
|
|
1435
|
+
ValueError: If `f_min` is greater than `sample_rate // 2` when `f_max` is set to None.
|
|
1436
|
+
ValueError: If `dct_type` is not 2.
|
|
1437
|
+
|
|
1438
|
+
Supported Platforms:
|
|
1439
|
+
``CPU``
|
|
1440
|
+
|
|
1441
|
+
Examples:
|
|
1442
|
+
>>> import numpy as np
|
|
1443
|
+
>>> import mindspore.dataset as ds
|
|
1444
|
+
>>> import mindspore.dataset.audio as audio
|
|
1445
|
+
>>>
|
|
1446
|
+
>>> waveform = np.random.random([1, 1, 300])
|
|
1447
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1448
|
+
>>> transforms = [audio.LFCC()]
|
|
1449
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1450
|
+
"""
|
|
1451
|
+
|
|
1452
|
+
@check_lfcc
|
|
1453
|
+
def __init__(self, sample_rate=16000, n_filter=128, n_lfcc=40, f_min=0.0, f_max=None, dct_type=2,
|
|
1454
|
+
norm=NormMode.ORTHO, log_lf=False, speckwargs=None):
|
|
1455
|
+
super().__init__()
|
|
1456
|
+
self.sample_rate = sample_rate
|
|
1457
|
+
self.n_filter = n_filter
|
|
1458
|
+
self.n_lfcc = n_lfcc
|
|
1459
|
+
self.f_min = f_min
|
|
1460
|
+
self.f_max = f_max if f_max is not None else sample_rate // 2
|
|
1461
|
+
self.dct_type = dct_type
|
|
1462
|
+
self.norm = norm
|
|
1463
|
+
self.log_lf = log_lf
|
|
1464
|
+
self.speckwargs = speckwargs
|
|
1465
|
+
if speckwargs is None:
|
|
1466
|
+
self.speckwargs = {}
|
|
1467
|
+
self.speckwargs.setdefault("n_fft", 400)
|
|
1468
|
+
self.speckwargs.setdefault("win_length", self.speckwargs.get("n_fft"))
|
|
1469
|
+
self.speckwargs.setdefault("hop_length", self.speckwargs.get("win_length") // 2)
|
|
1470
|
+
self.speckwargs.setdefault("pad", 0)
|
|
1471
|
+
self.speckwargs.setdefault("window", WindowType.HANN)
|
|
1472
|
+
self.speckwargs.setdefault("power", 2.0)
|
|
1473
|
+
self.speckwargs.setdefault("normalized", False)
|
|
1474
|
+
self.speckwargs.setdefault("center", True)
|
|
1475
|
+
self.speckwargs.setdefault("pad_mode", BorderType.REFLECT)
|
|
1476
|
+
self.speckwargs.setdefault("onesided", True)
|
|
1477
|
+
self.window = self.speckwargs.get("window")
|
|
1478
|
+
self.pad_mode = self.speckwargs.get("pad_mode")
|
|
1479
|
+
|
|
1480
|
+
def parse(self):
|
|
1481
|
+
return cde.LFCCOperation(self.sample_rate, self.n_filter, self.n_lfcc, self.f_min, self.f_max,
|
|
1482
|
+
self.dct_type, DE_C_NORM_MODE.get(self.norm), self.log_lf, self.speckwargs,
|
|
1483
|
+
DE_C_WINDOW_TYPE.get(self.window), DE_C_BORDER_TYPE.get(self.pad_mode))
|
|
1484
|
+
|
|
1485
|
+
|
|
1184
1486
|
class LFilter(AudioTensorOperation):
|
|
1185
1487
|
"""
|
|
1186
1488
|
Perform an IIR filter by evaluating different equation.
|
|
@@ -1201,6 +1503,9 @@ class LFilter(AudioTensorOperation):
|
|
|
1201
1503
|
TypeError: If `clamp` is not of type bool.
|
|
1202
1504
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1203
1505
|
|
|
1506
|
+
Supported Platforms:
|
|
1507
|
+
``CPU``
|
|
1508
|
+
|
|
1204
1509
|
Examples:
|
|
1205
1510
|
>>> import numpy as np
|
|
1206
1511
|
>>>
|
|
@@ -1236,7 +1541,7 @@ class LowpassBiquad(AudioTensorOperation):
|
|
|
1236
1541
|
Similar to `SoX <http://sox.sourceforge.net/sox.html>`_ implementation.
|
|
1237
1542
|
|
|
1238
1543
|
Note:
|
|
1239
|
-
The
|
|
1544
|
+
The shape of the audio waveform to be processed needs to be <..., time>.
|
|
1240
1545
|
|
|
1241
1546
|
Args:
|
|
1242
1547
|
sample_rate (int): Sampling rate (in Hz), which can't be zero.
|
|
@@ -1286,6 +1591,9 @@ class Magphase(AudioTensorOperation):
|
|
|
1286
1591
|
Raises:
|
|
1287
1592
|
RuntimeError: If the shape of input audio waveform does not match (..., 2).
|
|
1288
1593
|
|
|
1594
|
+
Supported Platforms:
|
|
1595
|
+
``CPU``
|
|
1596
|
+
|
|
1289
1597
|
Examples:
|
|
1290
1598
|
>>> import numpy as np
|
|
1291
1599
|
>>>
|
|
@@ -1319,6 +1627,9 @@ class MaskAlongAxis(AudioTensorOperation):
|
|
|
1319
1627
|
ValueError: If `mask_width` is invalid (< 1).
|
|
1320
1628
|
ValueError: If `axis` is not type of int or not within [1, 2].
|
|
1321
1629
|
|
|
1630
|
+
Supported Platforms:
|
|
1631
|
+
``CPU``
|
|
1632
|
+
|
|
1322
1633
|
Examples:
|
|
1323
1634
|
>>> import numpy as np
|
|
1324
1635
|
>>>
|
|
@@ -1361,6 +1672,9 @@ class MaskAlongAxisIID(AudioTensorOperation):
|
|
|
1361
1672
|
ValueError: If `axis` is not in range of [1, 2].
|
|
1362
1673
|
RuntimeError: If input tensor is not in shape of <..., freq, time>.
|
|
1363
1674
|
|
|
1675
|
+
Supported Platforms:
|
|
1676
|
+
``CPU``
|
|
1677
|
+
|
|
1364
1678
|
Examples:
|
|
1365
1679
|
>>> import numpy as np
|
|
1366
1680
|
>>>
|
|
@@ -1400,9 +1714,26 @@ class MelScale(AudioTensorOperation):
|
|
|
1400
1714
|
n_stft (int, optional): Number of bins in STFT. Default: 201.
|
|
1401
1715
|
norm (NormType, optional): Type of norm, value should be NormType.SLANEY or NormType::NONE.
|
|
1402
1716
|
If norm is NormType.SLANEY, divide the triangular mel weight by the width of the mel band.
|
|
1403
|
-
Default: NormType.NONE.
|
|
1717
|
+
Default: NormType.NONE, no narmalization.
|
|
1404
1718
|
mel_type (MelType, optional): Type to use, value should be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1405
1719
|
|
|
1720
|
+
Raises:
|
|
1721
|
+
TypeError: If `n_mels` is not of type int.
|
|
1722
|
+
ValueError: If `n_mels` is not positive.
|
|
1723
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1724
|
+
ValueError: If `sample_rate` is not positive.
|
|
1725
|
+
TypeError: If `f_min` is not of type float.
|
|
1726
|
+
ValueError: If `f_min` is greater than or equal to `f_max` .
|
|
1727
|
+
TypeError: If `f_max` is not of type float.
|
|
1728
|
+
ValueError: If `f_max` is a negative number.
|
|
1729
|
+
TypeError: If `n_stft` is not of type int.
|
|
1730
|
+
ValueError: If `n_stft` is not positive.
|
|
1731
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1732
|
+
TypeError: If `mel_type` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1733
|
+
|
|
1734
|
+
Supported Platforms:
|
|
1735
|
+
``CPU``
|
|
1736
|
+
|
|
1406
1737
|
Examples:
|
|
1407
1738
|
>>> import numpy as np
|
|
1408
1739
|
>>>
|
|
@@ -1430,6 +1761,201 @@ class MelScale(AudioTensorOperation):
|
|
|
1430
1761
|
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))
|
|
1431
1762
|
|
|
1432
1763
|
|
|
1764
|
+
class MelSpectrogram(AudioTensorOperation):
|
|
1765
|
+
r"""
|
|
1766
|
+
Create MelSpectrogram for a raw audio signal.
|
|
1767
|
+
|
|
1768
|
+
Args:
|
|
1769
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), which can't be less than 0. Default: 16000.
|
|
1770
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins, which should be greater than 0 and less than
|
|
1771
|
+
twice of the last dimension size of the input. Default: 400.
|
|
1772
|
+
win_length (int, optional): Window size, which should be greater than 0 and no more than `n_fft` . Default:
|
|
1773
|
+
None, will be set to `n_fft` .
|
|
1774
|
+
hop_length (int, optional): Length of hop between STFT windows, which should be greater than 0.
|
|
1775
|
+
Default: None, will be set to `win_length // 2` .
|
|
1776
|
+
f_min (float, optional): Minimum frequency, which can't be greater than `f_max` . Default: 0.0.
|
|
1777
|
+
f_max (float, optional): Maximum frequency, which can't be less than 0. Default: None, will be set
|
|
1778
|
+
to `sample_rate // 2` .
|
|
1779
|
+
pad (int, optional): Two sided padding of signal, which can't be less than 0. Default: 0.
|
|
1780
|
+
n_mels (int, optional): Number of mel filterbanks, which can't be less than 0. Default: 128.
|
|
1781
|
+
window (WindowType, optional): A function to create a window tensor that is applied/multiplied to each
|
|
1782
|
+
frame/window. Default: WindowType.HANN.
|
|
1783
|
+
power (float, optional): Exponent for the magnitude spectrogram, which must be
|
|
1784
|
+
greater than 0, e.g., 1 for energy, 2 for power, etc. Default: 2.0.
|
|
1785
|
+
normalized (bool, optional): Whether to normalize by magnitude after stft. Default: False.
|
|
1786
|
+
center (bool, optional): Whether to pad waveform on both sides. Default: True.
|
|
1787
|
+
pad_mode (BorderType, optional): Controls the padding method used when `center` is True,
|
|
1788
|
+
can be BorderType.REFLECT, BorderType.CONSTANT, BorderType.EDGE or BorderType.SYMMETRIC.
|
|
1789
|
+
Default: BorderType.REFLECT.
|
|
1790
|
+
onesided (bool, optional): Controls whether to return half of results to avoid redundancy. Default: True.
|
|
1791
|
+
norm (NormType, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
|
|
1792
|
+
(area normalization). Default: NormType.NONE, no narmalization.
|
|
1793
|
+
mel_scale (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK. Default: MelType.HTK.
|
|
1794
|
+
|
|
1795
|
+
Raises:
|
|
1796
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1797
|
+
TypeError: If `n_fft` is not of type int.
|
|
1798
|
+
TypeError: If `n_mels` is not of type int.
|
|
1799
|
+
TypeError: If `f_min` is not of type float.
|
|
1800
|
+
TypeError: If `f_max` is not of type float.
|
|
1801
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1802
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormType` .
|
|
1803
|
+
TypeError: If `mel_scale` is not of type :class:`mindspore.dataset.audio.MelType` .
|
|
1804
|
+
TypeError: If `power` is not of type float.
|
|
1805
|
+
TypeError: If `normalized` is not of type bool.
|
|
1806
|
+
TypeError: If `center` is not of type bool.
|
|
1807
|
+
TypeError: If `pad_mode` is not of type :class:`mindspore.dataset.audio.BorderType` .
|
|
1808
|
+
TypeError: If `onesided` is not of type bool.
|
|
1809
|
+
TypeError: If `pad` is not of type int.
|
|
1810
|
+
TypeError: If `win_length` is not of type int.
|
|
1811
|
+
TypeError: If `hop_length` is not of type int.
|
|
1812
|
+
ValueError: If `sample_rate` is a negative number.
|
|
1813
|
+
ValueError: If `n_fft` is not positive.
|
|
1814
|
+
ValueError: If `n_mels` is a negative number.
|
|
1815
|
+
ValueError: If `f_min` is greater than `f_max` .
|
|
1816
|
+
ValueError: If `f_max` is a negative number.
|
|
1817
|
+
ValueError: If `f_min` is not less than `sample_rate // 2` when `f_max` is set to None.
|
|
1818
|
+
ValueError: If `power` is not positive.
|
|
1819
|
+
ValueError: If `pad` is a negative number.
|
|
1820
|
+
ValueError: If `win_length` is not positive.
|
|
1821
|
+
ValueError: If `hop_length` is not positive.
|
|
1822
|
+
|
|
1823
|
+
Supported Platforms:
|
|
1824
|
+
``CPU``
|
|
1825
|
+
|
|
1826
|
+
Examples:
|
|
1827
|
+
>>> import numpy as np
|
|
1828
|
+
>>>
|
|
1829
|
+
>>> from mindspore.dataset.audio import WindowType, BorderType, NormType, MelType
|
|
1830
|
+
>>>
|
|
1831
|
+
>>> waveform = np.array([[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4]]])
|
|
1832
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1833
|
+
>>> transforms = [audio.MelSpectrogram(sample_rate=16000, n_fft=16, win_length=16, hop_length=8, f_min=0.0, \
|
|
1834
|
+
... f_max=5000.0, pad=0, n_mels=8, window=WindowType.HANN, power=2.0, \
|
|
1835
|
+
... normalized=False, center=True, pad_mode=BorderType.REFLECT, \
|
|
1836
|
+
... onesided=True, norm=NormType.SLANEY, mel_scale=MelType.HTK)]
|
|
1837
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1838
|
+
"""
|
|
1839
|
+
|
|
1840
|
+
@check_mel_spectrogram
|
|
1841
|
+
def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0.0, f_max=None, pad=0,
|
|
1842
|
+
n_mels=128, window=WindowType.HANN, power=2.0, normalized=False, center=True,
|
|
1843
|
+
pad_mode=BorderType.REFLECT, onesided=True, norm=NormType.NONE, mel_scale=MelType.HTK):
|
|
1844
|
+
super().__init__()
|
|
1845
|
+
self.sample_rate = sample_rate
|
|
1846
|
+
self.n_fft = n_fft
|
|
1847
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
1848
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
|
|
1849
|
+
self.f_min = f_min
|
|
1850
|
+
self.f_max = f_max if f_max is not None else sample_rate // 2
|
|
1851
|
+
self.pad = pad
|
|
1852
|
+
self.n_mels = n_mels
|
|
1853
|
+
self.window = window
|
|
1854
|
+
self.power = power
|
|
1855
|
+
self.normalized = normalized
|
|
1856
|
+
self.center = center
|
|
1857
|
+
self.pad_mode = pad_mode
|
|
1858
|
+
self.onesided = onesided
|
|
1859
|
+
self.norm = norm
|
|
1860
|
+
self.mel_scale = mel_scale
|
|
1861
|
+
|
|
1862
|
+
def parse(self):
|
|
1863
|
+
return cde.MelSpectrogramOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length, self.f_min,
|
|
1864
|
+
self.f_max, self.pad, self.n_mels, DE_C_WINDOW_TYPE.get(self.window),
|
|
1865
|
+
self.power, self.normalized, self.center,
|
|
1866
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), self.onesided,
|
|
1867
|
+
DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_scale))
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
class MFCC(AudioTensorOperation):
|
|
1871
|
+
"""
|
|
1872
|
+
Create MFCC for a raw audio signal.
|
|
1873
|
+
|
|
1874
|
+
Args:
|
|
1875
|
+
sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: 16000.
|
|
1876
|
+
n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: 40.
|
|
1877
|
+
dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be 2. Default: 2.
|
|
1878
|
+
norm (NormMode, optional): Norm to use. Default: NormMode.ORTHO.
|
|
1879
|
+
log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: False.
|
|
1880
|
+
melkwargs (dict, optional): Arguments for :class:`mindspore.dataset.audio.MelSpectrogram`.
|
|
1881
|
+
Default: None, the default setting is a dict including
|
|
1882
|
+
|
|
1883
|
+
- 'n_fft': 400
|
|
1884
|
+
- 'win_length': n_fft
|
|
1885
|
+
- 'hop_length': win_length // 2
|
|
1886
|
+
- 'f_min': 0.0
|
|
1887
|
+
- 'f_max': sample_rate // 2
|
|
1888
|
+
- 'pad': 0
|
|
1889
|
+
- 'window': WindowType.HANN
|
|
1890
|
+
- 'power': 2.0
|
|
1891
|
+
- 'normalized': False
|
|
1892
|
+
- 'center': True
|
|
1893
|
+
- 'pad_mode': BorderType.REFLECT
|
|
1894
|
+
- 'onesided': True
|
|
1895
|
+
- 'norm': NormType.NONE
|
|
1896
|
+
- 'mel_scale': MelType.HTK
|
|
1897
|
+
|
|
1898
|
+
Raises:
|
|
1899
|
+
TypeError: If `sample_rate` is not of type int.
|
|
1900
|
+
TypeError: If `log_mels` is not of type bool.
|
|
1901
|
+
TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.NormMode` .
|
|
1902
|
+
TypeError: If `n_mfcc` is not of type int.
|
|
1903
|
+
TypeError: If `melkwargs` is not of type dict.
|
|
1904
|
+
ValueError: If `sample_rate` is a negative number.
|
|
1905
|
+
ValueError: If `n_mfcc` is a negative number.
|
|
1906
|
+
ValueError: If `dct_type` is not 2.
|
|
1907
|
+
|
|
1908
|
+
Supported Platforms:
|
|
1909
|
+
``CPU``
|
|
1910
|
+
|
|
1911
|
+
Examples:
|
|
1912
|
+
>>> import numpy as np
|
|
1913
|
+
>>>
|
|
1914
|
+
>>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
|
|
1915
|
+
... [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
|
|
1916
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
1917
|
+
>>> transforms = [audio.MFCC(4000, 1500, 2)]
|
|
1918
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
1919
|
+
"""
|
|
1920
|
+
|
|
1921
|
+
@check_mfcc
|
|
1922
|
+
def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None):
|
|
1923
|
+
super().__init__()
|
|
1924
|
+
self.sample_rate = sample_rate
|
|
1925
|
+
self.n_mfcc = n_mfcc
|
|
1926
|
+
self.dct_type = dct_type
|
|
1927
|
+
self.norm = norm
|
|
1928
|
+
self.log_mels = log_mels
|
|
1929
|
+
self.melkwargs = melkwargs
|
|
1930
|
+
if melkwargs is None:
|
|
1931
|
+
self.melkwargs = {}
|
|
1932
|
+
self.melkwargs.setdefault("n_fft", 400)
|
|
1933
|
+
self.melkwargs.setdefault("win_length", self.melkwargs.get("n_fft"))
|
|
1934
|
+
self.melkwargs.setdefault("hop_length", self.melkwargs.get("win_length") // 2)
|
|
1935
|
+
self.melkwargs.setdefault("f_min", 0.0)
|
|
1936
|
+
self.melkwargs.setdefault("f_max", sample_rate // 2)
|
|
1937
|
+
self.melkwargs.setdefault("pad", 0)
|
|
1938
|
+
self.melkwargs.setdefault("n_mels", 128)
|
|
1939
|
+
self.melkwargs.setdefault("window", WindowType.HANN)
|
|
1940
|
+
self.melkwargs.setdefault("power", 2.0)
|
|
1941
|
+
self.melkwargs.setdefault("normalized", False)
|
|
1942
|
+
self.melkwargs.setdefault("center", True)
|
|
1943
|
+
self.melkwargs.setdefault("pad_mode", BorderType.REFLECT)
|
|
1944
|
+
self.melkwargs.setdefault("onesided", True)
|
|
1945
|
+
self.melkwargs.setdefault("norm", NormType.NONE)
|
|
1946
|
+
self.melkwargs.setdefault("mel_scale", MelType.HTK)
|
|
1947
|
+
self.window = self.melkwargs.get("window")
|
|
1948
|
+
self.pad_mode = self.melkwargs.get("pad_mode")
|
|
1949
|
+
self.norm_mel = self.melkwargs.get("norm")
|
|
1950
|
+
self.mel_scale = self.melkwargs.get("mel_scale")
|
|
1951
|
+
|
|
1952
|
+
def parse(self):
|
|
1953
|
+
return cde.MFCCOperation(self.sample_rate, self.n_mfcc, self.dct_type, DE_C_NORM_MODE.get(self.norm),
|
|
1954
|
+
self.log_mels, self.melkwargs, DE_C_WINDOW_TYPE.get(self.window),
|
|
1955
|
+
DE_C_BORDER_TYPE.get(self.pad_mode), DE_C_NORM_TYPE.get(self.norm_mel),
|
|
1956
|
+
DE_C_MEL_TYPE.get(self.mel_scale))
|
|
1957
|
+
|
|
1958
|
+
|
|
1433
1959
|
class MuLawDecoding(AudioTensorOperation):
|
|
1434
1960
|
"""
|
|
1435
1961
|
Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
|
|
@@ -1470,6 +1996,13 @@ class MuLawEncoding(AudioTensorOperation):
|
|
|
1470
1996
|
Args:
|
|
1471
1997
|
quantization_channels (int, optional): Number of channels, which must be positive. Default: 256.
|
|
1472
1998
|
|
|
1999
|
+
Raises:
|
|
2000
|
+
TypeError: If `quantization_channels` is not of type int.
|
|
2001
|
+
ValueError: If `quantization_channels` is not a positive number.
|
|
2002
|
+
|
|
2003
|
+
Supported Platforms:
|
|
2004
|
+
``CPU``
|
|
2005
|
+
|
|
1473
2006
|
Examples:
|
|
1474
2007
|
>>> import numpy as np
|
|
1475
2008
|
>>>
|
|
@@ -1506,6 +2039,9 @@ class Overdrive(AudioTensorOperation):
|
|
|
1506
2039
|
ValueError: If `color` is not in range of [0, 100].
|
|
1507
2040
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1508
2041
|
|
|
2042
|
+
Supported Platforms:
|
|
2043
|
+
``CPU``
|
|
2044
|
+
|
|
1509
2045
|
Examples:
|
|
1510
2046
|
>>> import numpy as np
|
|
1511
2047
|
>>>
|
|
@@ -1558,6 +2094,9 @@ class Phaser(AudioTensorOperation):
|
|
|
1558
2094
|
TypeError: If `sinusoidal` is not of type bool.
|
|
1559
2095
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1560
2096
|
|
|
2097
|
+
Supported Platforms:
|
|
2098
|
+
``CPU``
|
|
2099
|
+
|
|
1561
2100
|
Examples:
|
|
1562
2101
|
>>> import numpy as np
|
|
1563
2102
|
>>>
|
|
@@ -1598,6 +2137,9 @@ class PhaseVocoder(AudioTensorOperation):
|
|
|
1598
2137
|
TypeError: If `phase_advance` is not of type :class:`numpy.ndarray` .
|
|
1599
2138
|
RuntimeError: If input tensor is not in shape of <..., freq, num_frame, complex=2>.
|
|
1600
2139
|
|
|
2140
|
+
Supported Platforms:
|
|
2141
|
+
``CPU``
|
|
2142
|
+
|
|
1601
2143
|
Examples:
|
|
1602
2144
|
>>> import numpy as np
|
|
1603
2145
|
>>>
|
|
@@ -1618,6 +2160,68 @@ class PhaseVocoder(AudioTensorOperation):
|
|
|
1618
2160
|
return cde.PhaseVocoderOperation(self.rate, self.phase_advance)
|
|
1619
2161
|
|
|
1620
2162
|
|
|
2163
|
+
class PitchShift(AudioTensorOperation):
|
|
2164
|
+
"""
|
|
2165
|
+
Shift the pitch of a waveform by `n_steps` steps.
|
|
2166
|
+
|
|
2167
|
+
Args:
|
|
2168
|
+
sample_rate (int): Sampling rate of waveform (in Hz).
|
|
2169
|
+
n_steps (int): The steps to shift waveform.
|
|
2170
|
+
bins_per_octave (int, optional): The number of steps per octave. Default: 12.
|
|
2171
|
+
n_fft (int, optional): Size of FFT, creates `n_fft // 2 + 1` bins. Default: 512.
|
|
2172
|
+
win_length (int, optional): Window size. Default: None, will be set to `n_fft` .
|
|
2173
|
+
hop_length (int, optional): Length of hop between STFT windows. Default: None,
|
|
2174
|
+
will be set to `win_length // 4` .
|
|
2175
|
+
window (WindowType, optional): Window tensor that is applied/multiplied to each frame/window.
|
|
2176
|
+
Default: WindowType.HANN.
|
|
2177
|
+
|
|
2178
|
+
Raises:
|
|
2179
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2180
|
+
TypeError: If `n_steps` is not of type int.
|
|
2181
|
+
TypeError: If `bins_per_octave` is not of type int.
|
|
2182
|
+
TypeError: If `n_fft` is not of type int.
|
|
2183
|
+
TypeError: If `win_length` is not of type int.
|
|
2184
|
+
TypeError: If `hop_length` is not of type int.
|
|
2185
|
+
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
2186
|
+
ValueError: If `sample_rate` is a negative number.
|
|
2187
|
+
ValueError: If `bins_per_octave` is 0.
|
|
2188
|
+
ValueError: If `n_fft` is a negative number.
|
|
2189
|
+
ValueError: If `win_length` is not positive.
|
|
2190
|
+
ValueError: If `hop_length` is not positive.
|
|
2191
|
+
|
|
2192
|
+
Supported Platforms:
|
|
2193
|
+
``CPU``
|
|
2194
|
+
|
|
2195
|
+
Examples:
|
|
2196
|
+
>>> import numpy as np
|
|
2197
|
+
>>>
|
|
2198
|
+
>>> import mindspore.dataset as ds
|
|
2199
|
+
>>> import mindspore.dataset.audio as audio
|
|
2200
|
+
>>> from mindspore.dataset.audio import WindowType
|
|
2201
|
+
>>>
|
|
2202
|
+
>>> waveform = np.random.random([1, 1, 300])
|
|
2203
|
+
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
|
2204
|
+
>>> transforms = [audio.PitchShift(sample_rate=16000,n_steps=4)]
|
|
2205
|
+
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
|
2206
|
+
"""
|
|
2207
|
+
|
|
2208
|
+
@check_pitch_shift
|
|
2209
|
+
def __init__(self, sample_rate, n_steps, bins_per_octave=12, n_fft=512, win_length=None,
|
|
2210
|
+
hop_length=None, window=WindowType.HANN):
|
|
2211
|
+
super().__init__()
|
|
2212
|
+
self.sample_rate = sample_rate
|
|
2213
|
+
self.n_steps = n_steps
|
|
2214
|
+
self.bins_per_octave = bins_per_octave
|
|
2215
|
+
self.n_fft = n_fft
|
|
2216
|
+
self.win_length = win_length if win_length is not None else n_fft
|
|
2217
|
+
self.hop_length = hop_length if hop_length is not None else self.win_length // 4
|
|
2218
|
+
self.window = window
|
|
2219
|
+
|
|
2220
|
+
def parse(self):
|
|
2221
|
+
return cde.PitchShiftOperation(self.sample_rate, self.n_steps, self.bins_per_octave, self.n_fft,
|
|
2222
|
+
self.win_length, self.hop_length, DE_C_WINDOW_TYPE.get(self.window))
|
|
2223
|
+
|
|
2224
|
+
|
|
1621
2225
|
DE_C_RESAMPLE_METHOD = {ResampleMethod.SINC_INTERPOLATION: cde.ResampleMethod.DE_RESAMPLE_SINC_INTERPOLATION,
|
|
1622
2226
|
ResampleMethod.KAISER_WINDOW: cde.ResampleMethod.DE_RESAMPLE_KAISER_WINDOW}
|
|
1623
2227
|
|
|
@@ -1649,6 +2253,9 @@ class Resample(AudioTensorOperation):
|
|
|
1649
2253
|
ValueError: If `rolloff` is not in range of (0, 1].
|
|
1650
2254
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1651
2255
|
|
|
2256
|
+
Supported Platforms:
|
|
2257
|
+
``CPU``
|
|
2258
|
+
|
|
1652
2259
|
Examples:
|
|
1653
2260
|
>>> import numpy as np
|
|
1654
2261
|
>>> from mindspore.dataset.audio import ResampleMethod
|
|
@@ -1688,6 +2295,13 @@ class RiaaBiquad(AudioTensorOperation):
|
|
|
1688
2295
|
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
|
|
1689
2296
|
can only be one of 44100, 48000, 88200, 96000.
|
|
1690
2297
|
|
|
2298
|
+
Raises:
|
|
2299
|
+
TypeError: If `sample_rate` is not of type int.
|
|
2300
|
+
ValueError: If `sample_rate` is not any of [44100, 48000, 88200, 96000].
|
|
2301
|
+
|
|
2302
|
+
Supported Platforms:
|
|
2303
|
+
``CPU``
|
|
2304
|
+
|
|
1691
2305
|
Examples:
|
|
1692
2306
|
>>> import numpy as np
|
|
1693
2307
|
>>>
|
|
@@ -1718,6 +2332,17 @@ class SlidingWindowCmn(AudioTensorOperation):
|
|
|
1718
2332
|
to the left. Default: False.
|
|
1719
2333
|
norm_vars (bool, optional): If True, normalize variance to one. Default: False.
|
|
1720
2334
|
|
|
2335
|
+
Raises:
|
|
2336
|
+
TypeError: If `cmn_window` is not of type int.
|
|
2337
|
+
ValueError: If `cmn_window` is a negative number.
|
|
2338
|
+
TypeError: If `min_cmn_window` is not of type int.
|
|
2339
|
+
ValueError: If `min_cmn_window` is a negative number.
|
|
2340
|
+
TypeError: If `center` is not of type bool.
|
|
2341
|
+
TypeError: If `norm_vars` is not of type bool.
|
|
2342
|
+
|
|
2343
|
+
Supported Platforms:
|
|
2344
|
+
``CPU``
|
|
2345
|
+
|
|
1721
2346
|
Examples:
|
|
1722
2347
|
>>> import numpy as np
|
|
1723
2348
|
>>>
|
|
@@ -1775,6 +2400,9 @@ class SpectralCentroid(TensorOperation):
|
|
|
1775
2400
|
TypeError: If `window` is not of type :class:`mindspore.dataset.audio.WindowType` .
|
|
1776
2401
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1777
2402
|
|
|
2403
|
+
Supported Platforms:
|
|
2404
|
+
``CPU``
|
|
2405
|
+
|
|
1778
2406
|
Examples:
|
|
1779
2407
|
>>> import numpy as np
|
|
1780
2408
|
>>>
|
|
@@ -1839,6 +2467,9 @@ class Spectrogram(TensorOperation):
|
|
|
1839
2467
|
TypeError: If `onesided` is not of type bool.
|
|
1840
2468
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
1841
2469
|
|
|
2470
|
+
Supported Platforms:
|
|
2471
|
+
``CPU``
|
|
2472
|
+
|
|
1842
2473
|
Examples:
|
|
1843
2474
|
>>> import numpy as np
|
|
1844
2475
|
>>>
|
|
@@ -1874,7 +2505,7 @@ class TimeMasking(AudioTensorOperation):
|
|
|
1874
2505
|
Apply masking to a spectrogram in the time domain.
|
|
1875
2506
|
|
|
1876
2507
|
Note:
|
|
1877
|
-
The
|
|
2508
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time>.
|
|
1878
2509
|
|
|
1879
2510
|
Args:
|
|
1880
2511
|
iid_masks (bool, optional): Whether to apply different masks to each example/channel. Default: False.
|
|
@@ -1930,7 +2561,7 @@ class TimeStretch(AudioTensorOperation):
|
|
|
1930
2561
|
Stretch Short Time Fourier Transform (STFT) in time without modifying pitch for a given rate.
|
|
1931
2562
|
|
|
1932
2563
|
Note:
|
|
1933
|
-
The
|
|
2564
|
+
The shape of the audio waveform to be processed needs to be <..., freq, time, complex=2>.
|
|
1934
2565
|
The first dimension represents the real part while the second represents the imaginary.
|
|
1935
2566
|
|
|
1936
2567
|
Args:
|
|
@@ -2003,6 +2634,9 @@ class TrebleBiquad(AudioTensorOperation):
|
|
|
2003
2634
|
ValueError: If `Q` is not in range of (0, 1].
|
|
2004
2635
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2005
2636
|
|
|
2637
|
+
Supported Platforms:
|
|
2638
|
+
``CPU``
|
|
2639
|
+
|
|
2006
2640
|
Examples:
|
|
2007
2641
|
>>> import numpy as np
|
|
2008
2642
|
>>>
|
|
@@ -2100,6 +2734,9 @@ class Vad(AudioTensorOperation):
|
|
|
2100
2734
|
ValueError: If `lp_lifter_freq` is not a positive number.
|
|
2101
2735
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2102
2736
|
|
|
2737
|
+
Supported Platforms:
|
|
2738
|
+
``CPU``
|
|
2739
|
+
|
|
2103
2740
|
Examples:
|
|
2104
2741
|
>>> import numpy as np
|
|
2105
2742
|
>>>
|
|
@@ -2165,6 +2802,9 @@ class Vol(AudioTensorOperation):
|
|
|
2165
2802
|
ValueError: If `gain` is not a positive number when `gain_type` is GainType.POWER.
|
|
2166
2803
|
RuntimeError: If input tensor is not in shape of <..., time>.
|
|
2167
2804
|
|
|
2805
|
+
Supported Platforms:
|
|
2806
|
+
``CPU``
|
|
2807
|
+
|
|
2168
2808
|
Examples:
|
|
2169
2809
|
>>> import numpy as np
|
|
2170
2810
|
>>> from mindspore.dataset.audio import GainType
|