mindspore 1.10.0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/ConcurrencyCheck.dll +0 -0
- mindspore/CppBuildInsights.dll +0 -0
- mindspore/CppCoreCheck.dll +0 -0
- mindspore/EnumIndex.dll +0 -0
- mindspore/EspXEngine.dll +0 -0
- mindspore/HResultCheck.dll +0 -0
- mindspore/KernelTraceControl.dll +0 -0
- mindspore/LocalESPC.dll +0 -0
- mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
- mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
- mindspore/VariantClear.dll +0 -0
- mindspore/__init__.py +9 -4
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/amp.py +53 -58
- mindspore/atlprov.dll +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/cfgpersist.dll +0 -0
- mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
- mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +291 -56
- mindspore/d3dcompiler_47.dll +0 -0
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/dpcmi.dll +0 -0
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/perf_msvcbuildinsights.dll +0 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/libatomic-1.dll +0 -0
- mindspore/libgcc_s_seh-1.dll +0 -0
- mindspore/libgfortran-4.dll +0 -0
- mindspore/libgomp-1.dll +0 -0
- mindspore/libjpeg-62.dll +0 -0
- mindspore/libmindspore.dll +0 -0
- mindspore/libmindspore_common.dll +0 -0
- mindspore/libmindspore_core.dll +0 -0
- mindspore/libmindspore_glog.dll +0 -0
- mindspore/libnnacl.dll +0 -0
- mindspore/libopencv_core452.dll +0 -0
- mindspore/libopencv_imgcodecs452.dll +0 -0
- mindspore/libopencv_imgproc452.dll +0 -0
- mindspore/libquadmath-0.dll +0 -0
- mindspore/libsqlite3.dll +0 -0
- mindspore/libssp-0.dll +0 -0
- mindspore/libstdc++-6.dll +0 -0
- mindspore/libtinyxml2.dll +0 -0
- mindspore/libturbojpeg.dll +0 -0
- mindspore/libwinpthread-1.dll +0 -0
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
1
|
+
# Copyright 2022-2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -31,6 +31,7 @@ import json
|
|
|
31
31
|
import os
|
|
32
32
|
import signal
|
|
33
33
|
import stat
|
|
34
|
+
import warnings
|
|
34
35
|
|
|
35
36
|
import gc
|
|
36
37
|
import time
|
|
@@ -45,13 +46,13 @@ import copy
|
|
|
45
46
|
import weakref
|
|
46
47
|
import platform
|
|
47
48
|
import psutil
|
|
48
|
-
import numpy as np
|
|
49
49
|
|
|
50
50
|
import mindspore._c_dataengine as cde
|
|
51
51
|
from mindspore._c_expression import typing
|
|
52
52
|
|
|
53
53
|
from mindspore import log as logger
|
|
54
|
-
from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context
|
|
54
|
+
from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched, _get_ps_context,\
|
|
55
|
+
_enable_distributed_mindrt
|
|
55
56
|
from mindspore.dataset.engine.offload import GetOffloadModel
|
|
56
57
|
|
|
57
58
|
import mindspore.dataset.transforms.c_transforms as c_transforms
|
|
@@ -59,6 +60,7 @@ import mindspore.dataset.transforms.py_transforms as py_transforms
|
|
|
59
60
|
import mindspore.dataset.transforms as transforms
|
|
60
61
|
from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCEPIECE_MODE
|
|
61
62
|
from mindspore.parallel._utils import _get_device_num
|
|
63
|
+
from mindspore.dataset.debug import DebugHook
|
|
62
64
|
|
|
63
65
|
from . import samplers
|
|
64
66
|
from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
|
|
@@ -67,9 +69,9 @@ from .queue import _SharedQueue, _Queue
|
|
|
67
69
|
from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
|
|
68
70
|
check_rename, check_device_send, check_take, check_output_shape, check_project, \
|
|
69
71
|
check_sync_wait, check_zip_dataset, check_add_column, check_concat, check_split, check_bucket_batch_by_length, \
|
|
70
|
-
check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send,
|
|
72
|
+
check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_padded_batch
|
|
71
73
|
from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
|
|
72
|
-
get_enable_watchdog, get_seed, set_seed
|
|
74
|
+
get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, _get_debug_hook_list
|
|
73
75
|
from ..core.datatypes import mstype_to_detype
|
|
74
76
|
from ..core.validator_helpers import replace_none
|
|
75
77
|
from ..core.py_util_helpers import ExceptionHandler
|
|
@@ -114,16 +116,19 @@ def _get_training_dataset():
|
|
|
114
116
|
return _train_dataset
|
|
115
117
|
|
|
116
118
|
|
|
117
|
-
def _reset_training_dataset(
|
|
119
|
+
def _reset_training_dataset(global_step, dataset_size):
|
|
118
120
|
"""
|
|
119
|
-
Reset the training dataset to the given step
|
|
121
|
+
Reset the training dataset to the given global step.
|
|
120
122
|
|
|
121
123
|
Args:
|
|
122
|
-
|
|
124
|
+
global_step (int): Number of global steps that have completed training.
|
|
125
|
+
Dataset will provide data from its next step after reset.
|
|
126
|
+
dataset_size (int): Number of steps per epoch.
|
|
123
127
|
"""
|
|
124
128
|
dataset = _get_training_dataset()
|
|
125
129
|
if dataset is not None:
|
|
126
|
-
|
|
130
|
+
epoch = global_step // dataset_size
|
|
131
|
+
dataset._reset(global_step, epoch) # pylint: disable=protected-access
|
|
127
132
|
else:
|
|
128
133
|
raise RuntimeError("Training dataset is not set.")
|
|
129
134
|
|
|
@@ -227,7 +232,7 @@ def _get_operator_process():
|
|
|
227
232
|
Inner implemented method, mainly for passing sub-process id in C layer
|
|
228
233
|
|
|
229
234
|
Returns:
|
|
230
|
-
dict, mapping dict of
|
|
235
|
+
dict, mapping dict of operation id and corresponding process id.
|
|
231
236
|
"""
|
|
232
237
|
global _OP_PROCESS
|
|
233
238
|
process_info = _OP_PROCESS
|
|
@@ -283,19 +288,20 @@ class Dataset:
|
|
|
283
288
|
|
|
|
284
289
|
MappableDataset
|
|
285
290
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
291
|
+
DatasetOperation: MapDataset(UnionBaseDataset)
|
|
292
|
+
BatchDataset(UnionBaseDataset)
|
|
293
|
+
PaddedBatchDataset(UnionBaseDataset)
|
|
294
|
+
BucketBatchByLengthDataset(UnionBaseDataset)
|
|
295
|
+
ShuffleDataset(UnionBaseDataset)
|
|
296
|
+
FilterDataset(UnionBaseDataset)
|
|
297
|
+
RepeatDataset(UnionBaseDataset)
|
|
298
|
+
SkipDataset(UnionBaseDataset)
|
|
299
|
+
TakeDataset(UnionBaseDataset)
|
|
300
|
+
ZipDataset(UnionBaseDataset)
|
|
301
|
+
ConcatDataset(UnionBaseDataset)
|
|
302
|
+
RenameDataset(UnionBaseDataset)
|
|
303
|
+
ProjectDataset(UnionBaseDataset)
|
|
304
|
+
SyncWaitDataset(UnionBaseDataset)
|
|
299
305
|
|
|
300
306
|
Impl Dataset - vision: ImageFolderDataset(MappableDataset, VisionBaseDataset)
|
|
301
307
|
USPSDataset(SourceDataset, VisionBaseDataset)
|
|
@@ -309,8 +315,8 @@ class Dataset:
|
|
|
309
315
|
NumpySlicesDataset(GeneratorDataset)
|
|
310
316
|
|
|
311
317
|
Args:
|
|
312
|
-
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel
|
|
313
|
-
|
|
318
|
+
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel.
|
|
319
|
+
Default: None.
|
|
314
320
|
"""
|
|
315
321
|
|
|
316
322
|
def __init__(self, children=None, num_parallel_workers=None, cache=None):
|
|
@@ -333,9 +339,6 @@ class Dataset:
|
|
|
333
339
|
self.saved_output_shapes = None
|
|
334
340
|
self.estimated_output_shapes = None
|
|
335
341
|
self.runtime_context = None
|
|
336
|
-
self.dynamic_setting = [False, None]
|
|
337
|
-
self.saved_min_shapes = None
|
|
338
|
-
self.saved_max_shapes = None
|
|
339
342
|
self._col_names = None
|
|
340
343
|
self.dataset_size = None
|
|
341
344
|
self._batch_size = None
|
|
@@ -347,7 +350,7 @@ class Dataset:
|
|
|
347
350
|
@staticmethod
|
|
348
351
|
def _get_operator_id(dataset):
|
|
349
352
|
"""
|
|
350
|
-
Internal method to iterate the tree and obtain op_id of each
|
|
353
|
+
Internal method to iterate the tree and obtain op_id of each operation.
|
|
351
354
|
|
|
352
355
|
Returns:
|
|
353
356
|
Dataset, the root dataset of the tree.
|
|
@@ -379,18 +382,6 @@ class Dataset:
|
|
|
379
382
|
_OP_PROCESS.update(generator_process)
|
|
380
383
|
return op_name
|
|
381
384
|
|
|
382
|
-
def close_pool(self):
|
|
383
|
-
"""
|
|
384
|
-
Close multiprocessing pool in dataset. If you are familiar with multiprocessing library, you can regard this
|
|
385
|
-
as a destructor for a processingPool object.
|
|
386
|
-
|
|
387
|
-
Note:
|
|
388
|
-
This interface will be deleted or invisible in the future. Please don't use it.
|
|
389
|
-
When you find that there are residual processes that do not exit correctly, you can use `kill -9 PROCESS_ID`
|
|
390
|
-
to end it, or through www.gitee.com/mindspore/mindspore send us an issue.
|
|
391
|
-
"""
|
|
392
|
-
logger.warning("This interface will be deleted or invisible in the future. Please don't use it.")
|
|
393
|
-
|
|
394
385
|
def create_ir_tree(self):
|
|
395
386
|
"""
|
|
396
387
|
Internal method to build an IR tree.
|
|
@@ -444,7 +435,7 @@ class Dataset:
|
|
|
444
435
|
|
|
445
436
|
@staticmethod
|
|
446
437
|
def _noop_mode():
|
|
447
|
-
if _is_role_sched()
|
|
438
|
+
if _is_role_sched():
|
|
448
439
|
return True
|
|
449
440
|
return False
|
|
450
441
|
|
|
@@ -459,10 +450,13 @@ class Dataset:
|
|
|
459
450
|
Serialize a pipeline into JSON string and dump into file if filename is provided.
|
|
460
451
|
|
|
461
452
|
Args:
|
|
462
|
-
filename (str): filename of JSON file to be saved as
|
|
453
|
+
filename (str): filename of JSON file to be saved as. Default: ''.
|
|
463
454
|
|
|
464
455
|
Returns:
|
|
465
456
|
str, JSON string of the pipeline.
|
|
457
|
+
|
|
458
|
+
Examples:
|
|
459
|
+
>>> dataset_json = dataset.to_json("/path/to/mnist_dataset_pipeline.json")
|
|
466
460
|
"""
|
|
467
461
|
ir_tree, _ = self.create_ir_tree()
|
|
468
462
|
return json.loads(ir_tree.to_json(filename))
|
|
@@ -495,7 +489,7 @@ class Dataset:
|
|
|
495
489
|
element_length_function (Callable, optional): A function that takes in
|
|
496
490
|
M arguments where M = len(column_names) and returns an integer. If no value
|
|
497
491
|
provided, parameter M the len(column_names) must be 1, and the size of the first
|
|
498
|
-
dimension of that column will be taken as the length
|
|
492
|
+
dimension of that column will be taken as the length. Default: None.
|
|
499
493
|
pad_info (dict, optional): The information about how to batch each column. The key
|
|
500
494
|
corresponds to the column name, and the value must be a tuple of 2 elements.
|
|
501
495
|
The first element corresponds to the shape to pad to, and the second
|
|
@@ -503,17 +497,17 @@ class Dataset:
|
|
|
503
497
|
specified, then that column will be padded to the longest in the current
|
|
504
498
|
batch, and 0 will be used as the padding value. Any None dimensions will
|
|
505
499
|
be padded to the longest in the current batch, unless if
|
|
506
|
-
pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
|
|
507
|
-
to None
|
|
500
|
+
`pad_to_bucket_boundary` is True. If no padding is wanted, set pad_info
|
|
501
|
+
to None. Default: None.
|
|
508
502
|
pad_to_bucket_boundary (bool, optional): If True, will pad each None
|
|
509
|
-
dimension in pad_info to the bucket_boundary minus 1. If there are any
|
|
510
|
-
elements that fall into the last bucket, an error will occur
|
|
511
|
-
|
|
503
|
+
dimension in `pad_info` to the bucket_boundary minus 1. If there are any
|
|
504
|
+
elements that fall into the last bucket, an error will occur.
|
|
505
|
+
Default: False.
|
|
512
506
|
drop_remainder (bool, optional): If True, will drop the last batch for each
|
|
513
|
-
bucket if it is not a full batch
|
|
507
|
+
bucket if it is not a full batch. Default: False.
|
|
514
508
|
|
|
515
509
|
Returns:
|
|
516
|
-
Dataset, dataset
|
|
510
|
+
Dataset, dataset bucketized and batched by length.
|
|
517
511
|
|
|
518
512
|
Examples:
|
|
519
513
|
>>> # Create a dataset where certain counts rows are combined into a batch
|
|
@@ -541,14 +535,15 @@ class Dataset:
|
|
|
541
535
|
element_length_function, pad_info, pad_to_bucket_boundary, drop_remainder)
|
|
542
536
|
|
|
543
537
|
@check_batch
|
|
544
|
-
def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None,
|
|
545
|
-
input_columns=None, output_columns=None, column_order=None, pad_info=None,
|
|
546
|
-
python_multiprocessing=False, max_rowsize=16):
|
|
538
|
+
def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, **kwargs):
|
|
547
539
|
"""
|
|
548
|
-
Combine batch_size number of consecutive rows into
|
|
540
|
+
Combine batch_size number of consecutive rows into batch which apply per_batch_map to the samples first.
|
|
549
541
|
|
|
550
542
|
For any column, all the elements within that column must have the same shape.
|
|
551
|
-
|
|
543
|
+
|
|
544
|
+
Refer to the following figure for the execution process:
|
|
545
|
+
|
|
546
|
+
.. image:: batch_en.png
|
|
552
547
|
|
|
553
548
|
Note:
|
|
554
549
|
The order of using repeat and batch reflects the number of batches and per_batch_map.
|
|
@@ -558,36 +553,39 @@ class Dataset:
|
|
|
558
553
|
batch_size (Union[int, Callable]): The number of rows each batch is created with. An
|
|
559
554
|
int or callable object which takes exactly 1 parameter, BatchInfo.
|
|
560
555
|
drop_remainder (bool, optional): Determines whether or not to drop the last block
|
|
561
|
-
whose data row number is less than batch size
|
|
556
|
+
whose data row number is less than batch size. Default: False. If True, and if there are less
|
|
562
557
|
than batch_size rows available to make the last batch, then those rows will
|
|
563
558
|
be dropped and not propagated to the child node.
|
|
564
|
-
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
(
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
559
|
+
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
|
|
560
|
+
Default: None.
|
|
561
|
+
**kwargs:
|
|
562
|
+
|
|
563
|
+
- per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], \
|
|
564
|
+
(List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable. Default: None.
|
|
565
|
+
A callable which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters.
|
|
566
|
+
Each list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists
|
|
567
|
+
should match with the number of entries in input_columns. The last parameter of the callable should
|
|
568
|
+
always be a BatchInfo object. Per_batch_map should return
|
|
569
|
+
(list[numpy.ndarray], list[numpy.ndarray], ...). The length of each list in output should be the same
|
|
570
|
+
as the input. output_columns is required if the number of output lists is different from input.
|
|
571
|
+
|
|
572
|
+
- input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of
|
|
573
|
+
the list should match with signature of per_batch_map callable. Default: None.
|
|
574
|
+
|
|
575
|
+
- output_columns (Union[str, list[str]], optional): List of names assigned to the columns
|
|
576
|
+
outputted by the last operation. This parameter is mandatory if len(input_columns) !=
|
|
577
|
+
len(output_columns). The size of this list must match the number of output
|
|
578
|
+
columns of the last operation. Default: None, output columns will have the same
|
|
579
|
+
name as the input columns, i.e., the columns will be replaced.
|
|
580
|
+
|
|
581
|
+
- python_multiprocessing (bool, optional): Parallelize Python function `per_batch_map` with
|
|
582
|
+
multi-processing or multi-threading mode, True means multi-processing, False means multi-threading
|
|
583
|
+
If `per_batch_map` is a I/O bound task, use multi-threading mode.
|
|
584
|
+
If `per_batch_map` is a CPU bound task, it is recommended to use multi-processing mode.
|
|
585
|
+
Default: False, use python multi-threading mode.
|
|
586
|
+
|
|
587
|
+
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to
|
|
588
|
+
copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
|
|
591
589
|
|
|
592
590
|
Returns:
|
|
593
591
|
BatchDataset, dataset batched.
|
|
@@ -597,7 +595,7 @@ class Dataset:
|
|
|
597
595
|
>>> # and drops the last incomplete batch if there is one.
|
|
598
596
|
>>> dataset = dataset.batch(100, True)
|
|
599
597
|
>>>
|
|
600
|
-
>>> # 2
|
|
598
|
+
>>> # 2) resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
|
|
601
599
|
>>> def np_resize(col, BatchInfo):
|
|
602
600
|
... output = col.copy()
|
|
603
601
|
... s = (BatchInfo.get_batch_num() + 1) ** 2
|
|
@@ -610,22 +608,64 @@ class Dataset:
|
|
|
610
608
|
... return (output,)
|
|
611
609
|
>>> dataset = dataset.batch(batch_size=8, input_columns=["image"], per_batch_map=np_resize)
|
|
612
610
|
>>>
|
|
613
|
-
>>> # 3
|
|
611
|
+
>>> # 3) Create a dataset where its batch size is dynamic
|
|
614
612
|
>>> # Define a callable batch size function and let batch size increase 1 each time.
|
|
615
613
|
>>> def add_one(BatchInfo):
|
|
616
614
|
... return BatchInfo.get_batch_num() + 1
|
|
617
615
|
>>> dataset = dataset.batch(batch_size=add_one, drop_remainder=True)
|
|
618
|
-
>>>
|
|
619
|
-
>>> # 4)Create a dataset with batch, then specify the column order.
|
|
620
|
-
>>> # Assume that the original coulmn order is ["image", "label"] and change to ["label", "image"].
|
|
621
|
-
>>> dataset = dataset.batch(32, column_order=["label", "image"])
|
|
622
616
|
"""
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
617
|
+
return BatchDataset(self, batch_size, drop_remainder, num_parallel_workers, **kwargs)
|
|
618
|
+
|
|
619
|
+
@check_padded_batch
|
|
620
|
+
def padded_batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None):
|
|
621
|
+
"""
|
|
622
|
+
Combine batch_size number of consecutive rows into batch which apply pad_info to the samples first.
|
|
623
|
+
|
|
624
|
+
Refer to the following figure for the execution process:
|
|
626
625
|
|
|
627
|
-
|
|
628
|
-
|
|
626
|
+
.. image:: padded_batch_en.png
|
|
627
|
+
|
|
628
|
+
Note:
|
|
629
|
+
The order of using repeat and padded_batch reflects the number of batches.
|
|
630
|
+
It is recommended that the repeat operation applied after the padded_batch operation finished.
|
|
631
|
+
|
|
632
|
+
Args:
|
|
633
|
+
batch_size (Union[int, Callable]): The number of rows each batch is created with. An
|
|
634
|
+
int or callable object which takes exactly 1 parameter, BatchInfo.
|
|
635
|
+
drop_remainder (bool, optional): Determines whether or not to drop the last block
|
|
636
|
+
whose data row number is less than batch size. Default: False. If True, and if there are less
|
|
637
|
+
than batch_size rows available to make the last batch, then those rows will
|
|
638
|
+
be dropped and not propagated to the child node.
|
|
639
|
+
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
|
|
640
|
+
Default: None.
|
|
641
|
+
pad_info (dict, optional): The information about how to batch each column. The key
|
|
642
|
+
corresponds to the column name, and the value must be a tuple of 2 elements.
|
|
643
|
+
The first element corresponds to the shape to pad to, and the second
|
|
644
|
+
element corresponds to the value to pad with. If a column is not
|
|
645
|
+
specified, then that column will be padded to the longest in the current
|
|
646
|
+
batch, and 0 will be used as the padding value. Any None dimensions will
|
|
647
|
+
be padded to the longest in the current batch, unless if
|
|
648
|
+
pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
|
|
649
|
+
to None. Default: None.
|
|
650
|
+
|
|
651
|
+
Returns:
|
|
652
|
+
PaddedBatchDataset, dataset batched.
|
|
653
|
+
|
|
654
|
+
Examples:
|
|
655
|
+
>>> # 1) Pad every sample to the largest sample's shape and batch the samples
|
|
656
|
+
>>> dataset = dataset.padded_batch(100, True, pad_info={})
|
|
657
|
+
>>>
|
|
658
|
+
>>> # 2) Create a dataset where every 100 rows are combined into a batch
|
|
659
|
+
>>> # and drops the last incomplete batch if there is one.
|
|
660
|
+
>>> dataset = dataset.padded_batch(100, True)
|
|
661
|
+
>>>
|
|
662
|
+
>>> # 3) Create a dataset where its batch size is dynamic
|
|
663
|
+
>>> # Define a callable batch size function and let batch size increase 1 each time.
|
|
664
|
+
>>> def add_one(BatchInfo):
|
|
665
|
+
... return BatchInfo.get_batch_num() + 1
|
|
666
|
+
>>> dataset = dataset.padded_batch(batch_size=add_one, drop_remainder=True)
|
|
667
|
+
"""
|
|
668
|
+
return PaddedBatchDataset(self, batch_size, drop_remainder, num_parallel_workers, pad_info)
|
|
629
669
|
|
|
630
670
|
@check_sync_wait
|
|
631
671
|
def sync_wait(self, condition_name, num_batch=1, callback=None):
|
|
@@ -634,8 +674,8 @@ class Dataset:
|
|
|
634
674
|
|
|
635
675
|
Args:
|
|
636
676
|
condition_name (str): The condition name that is used to toggle sending next row.
|
|
637
|
-
num_batch (int): the number of batches without blocking at the start of each epoch
|
|
638
|
-
callback (function): The callback function that will be invoked when sync_update is called
|
|
677
|
+
num_batch (int): the number of batches without blocking at the start of each epoch. Default: 1.
|
|
678
|
+
callback (function): The callback function that will be invoked when sync_update is called. Default: None.
|
|
639
679
|
|
|
640
680
|
Returns:
|
|
641
681
|
SyncWaitDataset, dataset added a blocking condition.
|
|
@@ -678,27 +718,27 @@ class Dataset:
|
|
|
678
718
|
@check_shuffle
|
|
679
719
|
def shuffle(self, buffer_size):
|
|
680
720
|
"""
|
|
681
|
-
|
|
721
|
+
Shuffle the dataset by creating a cache with the size of `buffer_size` .
|
|
682
722
|
|
|
683
|
-
1. Make a shuffle buffer that contains the first buffer_size rows.
|
|
723
|
+
1. Make a shuffle buffer that contains the first `buffer_size` rows.
|
|
684
724
|
2. Randomly select an element from the shuffle buffer to be the next row
|
|
685
725
|
propagated to the child node.
|
|
686
726
|
3. Get the next row (if any) from the parent node and put it in the shuffle buffer.
|
|
687
727
|
4. Repeat steps 2 and 3 until there are no more rows left in the shuffle buffer.
|
|
688
728
|
|
|
689
|
-
A random seed can be provided to be used on the first epoch via `dataset.config.set_seed
|
|
729
|
+
A random seed can be provided to be used on the first epoch via `dataset.config.set_seed` . In every subsequent
|
|
690
730
|
epoch, the seed is changed to a new one, randomly generated value.
|
|
691
731
|
|
|
692
732
|
Args:
|
|
693
733
|
buffer_size (int): The size of the buffer (must be larger than 1) for
|
|
694
|
-
shuffling. Setting buffer_size equal to the number of rows in the entire
|
|
734
|
+
shuffling. Setting `buffer_size` equal to the number of rows in the entire
|
|
695
735
|
dataset will result in a global shuffle.
|
|
696
736
|
|
|
697
737
|
Returns:
|
|
698
738
|
Dataset, dataset shuffled.
|
|
699
739
|
|
|
700
740
|
Raises:
|
|
701
|
-
RuntimeError: If exist sync
|
|
741
|
+
RuntimeError: If exist sync operations before shuffle.
|
|
702
742
|
|
|
703
743
|
Examples:
|
|
704
744
|
>>> # dataset is an instance object of Dataset
|
|
@@ -715,7 +755,7 @@ class Dataset:
|
|
|
715
755
|
|
|
716
756
|
Args:
|
|
717
757
|
func (function): A function that must take one `numpy.ndarray` as an argument and
|
|
718
|
-
return a `Dataset
|
|
758
|
+
return a `Dataset` .
|
|
719
759
|
|
|
720
760
|
Returns:
|
|
721
761
|
Dataset, dataset applied by the function.
|
|
@@ -767,57 +807,77 @@ class Dataset:
|
|
|
767
807
|
|
|
768
808
|
@check_map
|
|
769
809
|
def map(self, operations, input_columns=None, output_columns=None, column_order=None,
|
|
770
|
-
num_parallel_workers=None,
|
|
771
|
-
max_rowsize=16, offload=None):
|
|
810
|
+
num_parallel_workers=None, **kwargs):
|
|
772
811
|
"""
|
|
773
812
|
Apply each operation in operations to this dataset.
|
|
774
813
|
|
|
775
814
|
Each operation will be passed one or more columns from the dataset as input, and one or
|
|
776
815
|
more columns will be outputted. The first operation will be passed the columns specified
|
|
777
|
-
in input_columns as input. If there is more than one
|
|
816
|
+
in input_columns as input. If there is more than one operation in operations, the outputted
|
|
778
817
|
columns of the previous operation are used as the input columns for the next operation.
|
|
779
818
|
|
|
780
819
|
The columns outputted by the very last operation will be assigned names specified by
|
|
781
|
-
`output_columns
|
|
820
|
+
`output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
|
|
821
|
+
|
|
822
|
+
- If you use transformations (
|
|
823
|
+
`vision transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
|
|
824
|
+
dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
|
|
825
|
+
`nlp transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
|
|
826
|
+
dataset.transforms.html#module-mindspore.dataset.text>`_ ,
|
|
827
|
+
`audio transform <https://mindspore.cn/docs/en/r2.0/api_python/mindspore.\
|
|
828
|
+
dataset.transforms.html#module-mindspore.dataset.audio>`_ )
|
|
829
|
+
provided by mindspore dataset, please use the following parameters:
|
|
830
|
+
|
|
831
|
+
.. image:: map_parameter_en.png
|
|
832
|
+
|
|
833
|
+
- If you use user-defined transform as PyFunc (Python Func), please use the following parameters:
|
|
834
|
+
|
|
835
|
+
.. image:: map_parameter_pyfunc_en.png
|
|
782
836
|
|
|
783
837
|
Args:
|
|
784
838
|
operations (Union[list[TensorOperation], list[functions]]): List of operations to be
|
|
785
839
|
applied on the dataset. Operations are applied in the order they appear in this list.
|
|
786
840
|
input_columns (Union[str, list[str]], optional): List of the names of the columns that will be passed to
|
|
787
841
|
the first operation as input. The size of this list must match the number of
|
|
788
|
-
input columns expected by the first
|
|
842
|
+
input columns expected by the first operation. Default: None, the first
|
|
789
843
|
operation will be passed however many columns that are required, starting from
|
|
790
|
-
the first column
|
|
844
|
+
the first column.
|
|
791
845
|
output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
|
|
792
846
|
the last operation. This parameter is mandatory if len(input_columns) !=
|
|
793
847
|
len(output_columns). The size of this list must match the number of output
|
|
794
|
-
columns of the last operation.
|
|
795
|
-
name as the input columns, i.e., the columns will be replaced
|
|
796
|
-
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
|
|
797
|
-
dataset (default=None). The parameter is required when len(input_column) != len(output_column).
|
|
798
|
-
Caution: the list here is not just the columns specified in parameter input_columns and output_columns.
|
|
848
|
+
columns of the last operation. Default: None, output columns will have the same
|
|
849
|
+
name as the input columns, i.e., the columns will be replaced.
|
|
799
850
|
num_parallel_workers (int, optional): Number of threads used to process the dataset in
|
|
800
|
-
parallel
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
851
|
+
parallel. Default: None, the value from the configuration will be used.
|
|
852
|
+
**kwargs:
|
|
853
|
+
|
|
854
|
+
- python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
|
|
855
|
+
This option could be beneficial if the Python operation is computational heavy. Default: False.
|
|
856
|
+
|
|
857
|
+
- max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to
|
|
858
|
+
copy data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
|
|
859
|
+
|
|
860
|
+
- cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
|
861
|
+
Default: None, which means no cache is used.
|
|
862
|
+
|
|
863
|
+
- callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called.
|
|
864
|
+
Default: None.
|
|
865
|
+
|
|
866
|
+
- offload (bool, optional): Flag to indicate whether offload is used. Default: None.
|
|
809
867
|
|
|
810
868
|
Note:
|
|
811
869
|
- Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
|
|
812
870
|
Python functions (PyFuncs).
|
|
813
871
|
- Do not add network computing operators from mindspore.nn and mindspore.ops or others into this
|
|
814
|
-
`operations
|
|
872
|
+
`operations` .
|
|
815
873
|
|
|
816
874
|
Returns:
|
|
817
875
|
Dataset, dataset after mapping operation.
|
|
818
876
|
|
|
819
877
|
Examples:
|
|
820
878
|
>>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
|
|
879
|
+
>>> # image is of type bytes type which can be decoded to RGB
|
|
880
|
+
>>> # label is of type int32
|
|
821
881
|
>>>
|
|
822
882
|
>>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
|
|
823
883
|
>>> decode_op = c_vision.Decode(rgb=True)
|
|
@@ -826,30 +886,15 @@ class Dataset:
|
|
|
826
886
|
>>>
|
|
827
887
|
>>> # 1) Simple map example.
|
|
828
888
|
>>>
|
|
829
|
-
>>> # Apply decode_op on column "image".
|
|
830
|
-
>>> # column of decode_op. Since column_order is not provided, both columns "image"
|
|
831
|
-
>>> # and "label" will be propagated to the child node in their original order.
|
|
889
|
+
>>> # Apply decode_op on column "image".
|
|
832
890
|
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"])
|
|
833
891
|
>>>
|
|
834
892
|
>>> # Decode and rename column "image" to "decoded_image".
|
|
835
893
|
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"], output_columns=["decoded_image"])
|
|
836
894
|
>>>
|
|
837
|
-
>>> #
|
|
838
|
-
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
|
839
|
-
... output_columns=None, column_order=["label", "image"])
|
|
840
|
-
>>>
|
|
841
|
-
>>> # Rename column "image" to "decoded_image" and also specify the order of the output columns.
|
|
842
|
-
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
|
843
|
-
... output_columns=["decoded_image"], column_order=["label", "decoded_image"])
|
|
844
|
-
>>>
|
|
845
|
-
>>> # Rename column "image" to "decoded_image" and keep only this column.
|
|
846
|
-
>>> dataset = dataset.map(operations=[decode_op], input_columns=["image"],
|
|
847
|
-
... output_columns=["decoded_image"], column_order=["decoded_image"])
|
|
848
|
-
>>>
|
|
849
|
-
>>> # A simple example for mapping pyfunc. Renaming columns and specifying column order
|
|
850
|
-
>>> # work in the same way as the previous examples.
|
|
895
|
+
>>> # A simple example for user defined python function transform.
|
|
851
896
|
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
|
|
852
|
-
>>> dataset = dataset.map(operations=[(lambda x: x
|
|
897
|
+
>>> dataset = dataset.map(operations=[(lambda x: x - 1)], input_columns=["data"])
|
|
853
898
|
>>>
|
|
854
899
|
>>> # 2) Map example with more than one operation.
|
|
855
900
|
>>>
|
|
@@ -858,17 +903,14 @@ class Dataset:
|
|
|
858
903
|
>>> # outputted by decode_op is passed as input to random_jitter_op.
|
|
859
904
|
>>> # random_jitter_op will output one column. Column "image" will be replaced by
|
|
860
905
|
>>> # the column outputted by random_jitter_op (the very last operation). All other
|
|
861
|
-
>>> # columns are unchanged.
|
|
862
|
-
>>> # columns will remain the same.
|
|
906
|
+
>>> # columns are unchanged.
|
|
863
907
|
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"])
|
|
864
908
|
>>>
|
|
865
909
|
>>> # Rename the column outputted by random_jitter_op to "image_mapped".
|
|
866
|
-
>>> # Specifying column order works in the same way as examples in 1).
|
|
867
910
|
>>> dataset = dataset.map(operations=[decode_op, random_jitter_op], input_columns=["image"],
|
|
868
911
|
... output_columns=["image_mapped"])
|
|
869
912
|
>>>
|
|
870
|
-
>>> # Map with multiple operations using pyfunc
|
|
871
|
-
>>> # work in the same way as examples in 1).
|
|
913
|
+
>>> # Map with multiple operations using pyfunc and rename column's name
|
|
872
914
|
>>> dataset = ds.NumpySlicesDataset(data=[[0, 1, 2]], column_names=["data"])
|
|
873
915
|
>>> dataset = dataset.map(operations=[(lambda x: x * x), (lambda x: x - 1)], input_columns=["data"],
|
|
874
916
|
... output_columns=["data_mapped"])
|
|
@@ -885,22 +927,9 @@ class Dataset:
|
|
|
885
927
|
>>> operations = [(lambda x, y: (x, x + y, x + y + 1)),
|
|
886
928
|
... (lambda x, y, z: x * y * z),
|
|
887
929
|
... (lambda x: (x % 2, x % 3, x % 5, x % 7))]
|
|
888
|
-
>>>
|
|
889
|
-
>>> # Note: Since the number of input columns is not the same as the number of
|
|
890
|
-
>>> # output columns, the output_columns and column_order parameters must be
|
|
891
|
-
>>> # specified. Otherwise, this map call will also result in an error.
|
|
892
|
-
>>>
|
|
893
930
|
>>> dataset = ds.NumpySlicesDataset(data=([[0, 1, 2]], [[3, 4, 5]]), column_names=["x", "y"])
|
|
894
|
-
>>>
|
|
895
|
-
>>> # Propagate all columns to the child node in this order:
|
|
896
|
-
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
|
|
897
|
-
... output_columns=["mod2", "mod3", "mod5", "mod7"],
|
|
898
|
-
... column_order=["mod2", "mod3", "mod5", "mod7"])
|
|
899
|
-
>>>
|
|
900
|
-
>>> # Propagate some columns to the child node in this order:
|
|
901
931
|
>>> dataset = dataset.map(operations, input_columns=["x", "y"],
|
|
902
|
-
... output_columns=["mod2", "mod3", "mod5", "mod7"]
|
|
903
|
-
... column_order=["mod7", "mod3", "col2"])
|
|
932
|
+
... output_columns=["mod2", "mod3", "mod5", "mod7"])
|
|
904
933
|
"""
|
|
905
934
|
if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
|
|
906
935
|
num_parallel_workers = 1
|
|
@@ -909,8 +938,7 @@ class Dataset:
|
|
|
909
938
|
"mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it "
|
|
910
939
|
"with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.")
|
|
911
940
|
|
|
912
|
-
return MapDataset(self, operations, input_columns, output_columns,
|
|
913
|
-
python_multiprocessing, cache, callbacks, max_rowsize, offload)
|
|
941
|
+
return MapDataset(self, operations, input_columns, output_columns, num_parallel_workers, **kwargs)
|
|
914
942
|
|
|
915
943
|
@check_filter
|
|
916
944
|
def filter(self, predicate, input_columns=None, num_parallel_workers=None):
|
|
@@ -920,9 +948,9 @@ class Dataset:
|
|
|
920
948
|
Args:
|
|
921
949
|
predicate (callable): Python callable which returns a boolean value. If False then filter the element.
|
|
922
950
|
input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
|
|
923
|
-
or provided with None, the predicate will be applied on all columns in the dataset
|
|
951
|
+
or provided with None, the predicate will be applied on all columns in the dataset. Default: None.
|
|
924
952
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
925
|
-
in parallel
|
|
953
|
+
in parallel. Default: None.
|
|
926
954
|
|
|
927
955
|
Returns:
|
|
928
956
|
Dataset, dataset filtered.
|
|
@@ -944,7 +972,7 @@ class Dataset:
|
|
|
944
972
|
the repeat operation is used after the batch operation.
|
|
945
973
|
|
|
946
974
|
Args:
|
|
947
|
-
count (int): Number of times the dataset is going to be repeated
|
|
975
|
+
count (int): Number of times the dataset is going to be repeated. Default: None.
|
|
948
976
|
|
|
949
977
|
Returns:
|
|
950
978
|
Dataset, dataset repeated.
|
|
@@ -960,7 +988,7 @@ class Dataset:
|
|
|
960
988
|
>>> dataset = dataset.repeat(50)
|
|
961
989
|
>>>
|
|
962
990
|
>>> # Create a dataset where the dataset is first repeated for
|
|
963
|
-
>>> # 50 epochs before shuffling. The shuffle
|
|
991
|
+
>>> # 50 epochs before shuffling. The shuffle operation will treat
|
|
964
992
|
>>> # the entire 50 epochs as one big dataset.
|
|
965
993
|
>>> dataset = dataset.repeat(50)
|
|
966
994
|
>>> dataset = dataset.shuffle(10)
|
|
@@ -997,7 +1025,7 @@ class Dataset:
|
|
|
997
1025
|
then take the given number of rows; otherwise take the given number of batches.
|
|
998
1026
|
|
|
999
1027
|
Args:
|
|
1000
|
-
count (int, optional): Number of elements to be taken from the dataset
|
|
1028
|
+
count (int, optional): Number of elements to be taken from the dataset. Default: -1.
|
|
1001
1029
|
|
|
1002
1030
|
Returns:
|
|
1003
1031
|
Dataset, dataset taken.
|
|
@@ -1085,7 +1113,7 @@ class Dataset:
|
|
|
1085
1113
|
- The sum of split sizes > K, the difference of sigma(round(fi * K)) - K will be removed from the first
|
|
1086
1114
|
large enough split such that it will have at least 1 row after removing the difference.
|
|
1087
1115
|
|
|
1088
|
-
randomize (bool, optional): Determines whether or not to split the data randomly
|
|
1116
|
+
randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
|
|
1089
1117
|
If True, the data will be randomly split. Otherwise, each split will be created with
|
|
1090
1118
|
consecutive rows from the dataset.
|
|
1091
1119
|
|
|
@@ -1147,12 +1175,15 @@ class Dataset:
|
|
|
1147
1175
|
name.
|
|
1148
1176
|
|
|
1149
1177
|
Args:
|
|
1150
|
-
datasets (Union[
|
|
1178
|
+
datasets (Union[Dataset, tuple[Dataset]]): A tuple of datasets or a single class Dataset
|
|
1151
1179
|
to be zipped together with this dataset.
|
|
1152
1180
|
|
|
1153
1181
|
Returns:
|
|
1154
1182
|
Dataset, dataset zipped.
|
|
1155
1183
|
|
|
1184
|
+
Raises:
|
|
1185
|
+
TypeError: The parameter is not dataset object or tuple of dataset objects.
|
|
1186
|
+
|
|
1156
1187
|
Examples:
|
|
1157
1188
|
>>> # Create a dataset which is the combination of dataset and dataset_1
|
|
1158
1189
|
>>> dataset = dataset.zip(dataset_1)
|
|
@@ -1249,7 +1280,7 @@ class Dataset:
|
|
|
1249
1280
|
|
|
1250
1281
|
Args:
|
|
1251
1282
|
apply_func (function): A function that must take one `Dataset` as an argument and
|
|
1252
|
-
return a preprocessed `Dataset
|
|
1283
|
+
return a preprocessed `Dataset` .
|
|
1253
1284
|
|
|
1254
1285
|
Returns:
|
|
1255
1286
|
Dataset, dataset applied by the function.
|
|
@@ -1284,9 +1315,9 @@ class Dataset:
|
|
|
1284
1315
|
Return a transferred Dataset that transfers data through a device.
|
|
1285
1316
|
|
|
1286
1317
|
Args:
|
|
1287
|
-
send_epoch_end (bool, optional): Whether to send end of sequence to device or not
|
|
1318
|
+
send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
|
|
1288
1319
|
create_data_info_queue (bool, optional): Whether to create queue which stores
|
|
1289
|
-
types and shapes of data or not
|
|
1320
|
+
types and shapes of data or not. Default: False.
|
|
1290
1321
|
|
|
1291
1322
|
Note:
|
|
1292
1323
|
If device is Ascend, features of data will be transferred one by one. The limitation
|
|
@@ -1294,34 +1325,17 @@ class Dataset:
|
|
|
1294
1325
|
|
|
1295
1326
|
Returns:
|
|
1296
1327
|
Dataset, dataset for transferring.
|
|
1297
|
-
"""
|
|
1298
|
-
return TransferDataset(self, send_epoch_end, create_data_info_queue)
|
|
1299
|
-
|
|
1300
|
-
@check_device_send
|
|
1301
|
-
def to_device(self, send_epoch_end=True, create_data_info_queue=False):
|
|
1302
|
-
"""
|
|
1303
|
-
Transfer data from CPU to GPU or Ascend or other devices.
|
|
1304
|
-
|
|
1305
|
-
Args:
|
|
1306
|
-
send_epoch_end (bool, optional): Whether to send the end of sequence to device or not (default=True).
|
|
1307
|
-
create_data_info_queue (bool, optional): Whether to create queue which stores
|
|
1308
|
-
types and shapes of data or not(default=False).
|
|
1309
|
-
|
|
1310
|
-
Note:
|
|
1311
|
-
This interface will be deleted or invisible in the future.
|
|
1312
|
-
Please use `device_que` to enable dataset sink mode.
|
|
1313
|
-
If device is Ascend, features of data will be transferred one by one. The limitation
|
|
1314
|
-
of data transmission per second is 256M.
|
|
1315
1328
|
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1329
|
+
Examples:
|
|
1330
|
+
>>> import time
|
|
1331
|
+
>>>
|
|
1332
|
+
>>> data = ds.TFRecordDataset('/path/to/TF_FILES', '/path/to/TF_SCHEMA_FILE', shuffle=ds.Shuffle.FILES)
|
|
1333
|
+
>>>
|
|
1334
|
+
>>> data = data.device_que()
|
|
1335
|
+
>>> data.send()
|
|
1336
|
+
>>> time.sleep(0.1)
|
|
1337
|
+
>>> data.stop_send()
|
|
1321
1338
|
"""
|
|
1322
|
-
logger.warning("This interface will be deleted or invisible in the future. "
|
|
1323
|
-
"Please use 'device_que' to enable dataset sink mode.")
|
|
1324
|
-
|
|
1325
1339
|
return TransferDataset(self, send_epoch_end, create_data_info_queue)
|
|
1326
1340
|
|
|
1327
1341
|
@check_save
|
|
@@ -1330,7 +1344,8 @@ class Dataset:
|
|
|
1330
1344
|
Save the dynamic data processed by the dataset pipeline in common dataset format.
|
|
1331
1345
|
Supported dataset formats: `mindrecord` only. And you can use `MindDataset` API to read the saved file(s).
|
|
1332
1346
|
|
|
1333
|
-
Implicit type casting exists when saving data as `mindrecord
|
|
1347
|
+
Implicit type casting exists when saving data as `mindrecord` . The transform table shows how to do
|
|
1348
|
+
type casting.
|
|
1334
1349
|
|
|
1335
1350
|
.. list-table:: Implicit Type Casting when Saving as `mindrecord`
|
|
1336
1351
|
:widths: 25 25 50
|
|
@@ -1381,8 +1396,8 @@ class Dataset:
|
|
|
1381
1396
|
|
|
1382
1397
|
Note:
|
|
1383
1398
|
1. To save the samples in order, set dataset's shuffle to False and num_files to 1.
|
|
1384
|
-
2. Before calling the function, do not use batch
|
|
1385
|
-
with random attribute in map
|
|
1399
|
+
2. Before calling the function, do not use batch operation, repeat operation or data augmentation operations
|
|
1400
|
+
with random attribute in map operation.
|
|
1386
1401
|
3. When array dimension is variable, one-dimensional arrays or
|
|
1387
1402
|
multi-dimensional arrays with variable dimension 0 are supported.
|
|
1388
1403
|
4. Mindrecord does not support uint64, multi-dimensional uint8(drop dimension) nor
|
|
@@ -1390,9 +1405,20 @@ class Dataset:
|
|
|
1390
1405
|
|
|
1391
1406
|
Args:
|
|
1392
1407
|
file_name (str): Path to dataset file.
|
|
1393
|
-
num_files (int, optional): Number of dataset files
|
|
1394
|
-
file_type (str, optional): Dataset format
|
|
1408
|
+
num_files (int, optional): Number of dataset files. Default: 1.
|
|
1409
|
+
file_type (str, optional): Dataset format. Default: 'mindrecord'.
|
|
1395
1410
|
|
|
1411
|
+
Examples:
|
|
1412
|
+
>>> import numpy as np
|
|
1413
|
+
>>>
|
|
1414
|
+
>>> def generator_1d():
|
|
1415
|
+
... for i in range(10):
|
|
1416
|
+
... yield (np.array([i]),)
|
|
1417
|
+
>>>
|
|
1418
|
+
>>>
|
|
1419
|
+
>>> # apply dataset operations
|
|
1420
|
+
>>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
|
1421
|
+
>>> d1.save('/path/to/save_file')
|
|
1396
1422
|
"""
|
|
1397
1423
|
ir_tree, api_tree = self.create_ir_tree()
|
|
1398
1424
|
|
|
@@ -1409,20 +1435,20 @@ class Dataset:
|
|
|
1409
1435
|
@check_tuple_iterator
|
|
1410
1436
|
def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False, do_copy=True):
|
|
1411
1437
|
"""
|
|
1412
|
-
Create an iterator over the dataset. The datatype retrieved back will be a list of `numpy.ndarray
|
|
1438
|
+
Create an iterator over the dataset. The datatype retrieved back will be a list of `numpy.ndarray` .
|
|
1413
1439
|
|
|
1414
1440
|
To specify which columns to list and the order needed, use columns_list. If columns_list
|
|
1415
1441
|
is not provided, the order of the columns will remain unchanged.
|
|
1416
1442
|
|
|
1417
1443
|
Args:
|
|
1418
|
-
columns (list[str], optional): List of columns to be used to specify the order of columns
|
|
1419
|
-
|
|
1444
|
+
columns (list[str], optional): List of columns to be used to specify the order of columns.
|
|
1445
|
+
Default: None, means all columns.
|
|
1420
1446
|
num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
|
|
1421
|
-
|
|
1447
|
+
Default: -1, iterator can be iterated infinite number of epochs.
|
|
1422
1448
|
output_numpy (bool, optional): Whether or not to output NumPy datatype.
|
|
1423
|
-
If output_numpy=False, iterator will output MSTensor
|
|
1424
|
-
do_copy (bool, optional):
|
|
1425
|
-
use this param to select the conversion method, only take False for better performance
|
|
1449
|
+
If output_numpy=False, iterator will output MSTensor. Default: False.
|
|
1450
|
+
do_copy (bool, optional): When output data type is mindspore.Tensor,
|
|
1451
|
+
use this param to select the conversion method, only take False for better performance. Default: True.
|
|
1426
1452
|
|
|
1427
1453
|
Returns:
|
|
1428
1454
|
Iterator, tuple iterator over the dataset.
|
|
@@ -1444,15 +1470,17 @@ class Dataset:
|
|
|
1444
1470
|
return TupleIterator(self, columns, num_epochs, output_numpy, do_copy)
|
|
1445
1471
|
|
|
1446
1472
|
@check_dict_iterator
|
|
1447
|
-
def create_dict_iterator(self, num_epochs=-1, output_numpy=False):
|
|
1473
|
+
def create_dict_iterator(self, num_epochs=-1, output_numpy=False, do_copy=True):
|
|
1448
1474
|
"""
|
|
1449
1475
|
Create an iterator over the dataset. The data retrieved will be a dictionary datatype.
|
|
1450
1476
|
|
|
1451
1477
|
Args:
|
|
1452
|
-
num_epochs (int, optional): Maximum number of epochs that iterator can be iterated
|
|
1453
|
-
|
|
1478
|
+
num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
|
|
1479
|
+
Default: -1, iterator can be iterated infinite number of epochs.
|
|
1454
1480
|
output_numpy (bool, optional): Whether or not to output NumPy datatype,
|
|
1455
|
-
if output_numpy=False, iterator will output MSTensor
|
|
1481
|
+
if output_numpy=False, iterator will output MSTensor. Default: False.
|
|
1482
|
+
do_copy (bool, optional): When output data type is mindspore.Tensor,
|
|
1483
|
+
use this param to select the conversion method, only take False for better performance. Default: True.
|
|
1456
1484
|
|
|
1457
1485
|
Returns:
|
|
1458
1486
|
Iterator, dictionary iterator over the dataset.
|
|
@@ -1471,7 +1499,7 @@ class Dataset:
|
|
|
1471
1499
|
|
|
1472
1500
|
if Dataset._noop_mode():
|
|
1473
1501
|
return DummyIterator(self, 'dict', output_numpy)
|
|
1474
|
-
return DictIterator(self, num_epochs, output_numpy)
|
|
1502
|
+
return DictIterator(self, num_epochs, output_numpy, do_copy)
|
|
1475
1503
|
|
|
1476
1504
|
def __iter__(self):
|
|
1477
1505
|
"""Create an iterator over the dataset."""
|
|
@@ -1587,11 +1615,6 @@ class Dataset:
|
|
|
1587
1615
|
if estimate and self.estimated_output_shapes is not None:
|
|
1588
1616
|
return self.estimated_output_shapes
|
|
1589
1617
|
|
|
1590
|
-
# if use set_dynamic_column, the `estimate` does not work, but they get the same result
|
|
1591
|
-
if self.dynamic_setting[0]:
|
|
1592
|
-
self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes = self._dynamic_output_shapes()
|
|
1593
|
-
return self.saved_output_shapes
|
|
1594
|
-
|
|
1595
1618
|
# We have a hang problem when two-level pipeline with multiprocessing, we need to extend the life cycle
|
|
1596
1619
|
# of runtime_context. We found this hang problem only occur on output_types and output_shapes.
|
|
1597
1620
|
runtime_getter = self._init_tree_getters()
|
|
@@ -1599,6 +1622,9 @@ class Dataset:
|
|
|
1599
1622
|
api_tree = runtime_getter[2]
|
|
1600
1623
|
output_shapes = runtime_getter[0].GetOutputShapes(estimate)
|
|
1601
1624
|
del api_tree
|
|
1625
|
+
# Need to terminate the runtime context to avoid the occasional hang problem for
|
|
1626
|
+
# Python (with multiprocessing enabled) in sink mode.
|
|
1627
|
+
self.runtime_context.Terminate()
|
|
1602
1628
|
del self.runtime_context
|
|
1603
1629
|
|
|
1604
1630
|
if estimate:
|
|
@@ -1626,6 +1652,9 @@ class Dataset:
|
|
|
1626
1652
|
api_tree = runtime_getter[2]
|
|
1627
1653
|
self.saved_output_types = runtime_getter[0].GetOutputTypes()
|
|
1628
1654
|
del api_tree
|
|
1655
|
+
# Need to terminate the runtime context to avoid the occasional hang problem for
|
|
1656
|
+
# Python (with multiprocessing enabled) in sink mode.
|
|
1657
|
+
self.runtime_context.Terminate()
|
|
1629
1658
|
del self.runtime_context
|
|
1630
1659
|
return self.saved_output_types
|
|
1631
1660
|
|
|
@@ -1648,136 +1677,6 @@ class Dataset:
|
|
|
1648
1677
|
|
|
1649
1678
|
return self.dataset_size
|
|
1650
1679
|
|
|
1651
|
-
@deprecated("1.5")
|
|
1652
|
-
def set_dynamic_columns(self, columns=None):
|
|
1653
|
-
"""
|
|
1654
|
-
Set dynamic shape information of source data, it should be set after the pipeline is defined.
|
|
1655
|
-
|
|
1656
|
-
Args:
|
|
1657
|
-
columns (dict): A dict contains shape information of each column in dataset.
|
|
1658
|
-
The value of shape[i] is :py:obj:`None` indicates that the data length of shape[i] is dynamic.
|
|
1659
|
-
|
|
1660
|
-
Examples:
|
|
1661
|
-
>>> import numpy as np
|
|
1662
|
-
>>>
|
|
1663
|
-
>>> def generator1():
|
|
1664
|
-
... for i in range(1, 100):
|
|
1665
|
-
... yield np.ones((16, i, 83)), np.array(i)
|
|
1666
|
-
>>>
|
|
1667
|
-
>>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
|
|
1668
|
-
>>> dataset.set_dynamic_columns(columns={"data1": [16, None, 83], "data2": []})
|
|
1669
|
-
"""
|
|
1670
|
-
if not isinstance(columns, dict):
|
|
1671
|
-
raise TypeError("Pass a dict to set dynamic shape, example: {\"data1\": [16, None, 256]}")
|
|
1672
|
-
self.dynamic_setting[0] = True
|
|
1673
|
-
self.dynamic_setting[1] = columns
|
|
1674
|
-
|
|
1675
|
-
def dynamic_min_max_shapes(self):
|
|
1676
|
-
"""
|
|
1677
|
-
Get minimum and maximum data length of dynamic source data, for dynamic graph compilation.
|
|
1678
|
-
|
|
1679
|
-
Returns:
|
|
1680
|
-
lists, min_shapes, max_shapes of source data.
|
|
1681
|
-
|
|
1682
|
-
Examples:
|
|
1683
|
-
>>> import numpy as np
|
|
1684
|
-
>>>
|
|
1685
|
-
>>> def generator1():
|
|
1686
|
-
... for i in range(1, 100):
|
|
1687
|
-
... yield np.ones((16, i, 83)), np.array(i)
|
|
1688
|
-
>>>
|
|
1689
|
-
>>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
|
|
1690
|
-
>>> dataset.set_dynamic_columns(columns={"data1": [16, None, 83], "data2": []})
|
|
1691
|
-
>>> min_shapes, max_shapes = dataset.dynamic_min_max_shapes()
|
|
1692
|
-
"""
|
|
1693
|
-
if self.saved_min_shapes is None or self.saved_max_shapes is None:
|
|
1694
|
-
self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes = self._dynamic_output_shapes()
|
|
1695
|
-
return self.saved_min_shapes, self.saved_max_shapes
|
|
1696
|
-
|
|
1697
|
-
@staticmethod
|
|
1698
|
-
def __check_dynamic_column_name(dynamic_columns, dataset_columns):
|
|
1699
|
-
for column in dynamic_columns:
|
|
1700
|
-
if column not in dataset_columns:
|
|
1701
|
-
raise RuntimeError("dynamic column [" + column + "] does not match any column in dataset: " +
|
|
1702
|
-
str(dataset_columns))
|
|
1703
|
-
|
|
1704
|
-
@staticmethod
|
|
1705
|
-
def __check_dynamic_column_shape(data, col, dynamic_columns):
|
|
1706
|
-
shape_mismatch = "dynamic column [" + col + "] with shape " + str(dynamic_columns[col]) + \
|
|
1707
|
-
" does not match dataset column [" + col + "] with shape " + str(list(data[col].shape))
|
|
1708
|
-
if data[col].ndim != len(dynamic_columns[col]):
|
|
1709
|
-
raise RuntimeError(shape_mismatch)
|
|
1710
|
-
for dim in range(len(dynamic_columns[col])):
|
|
1711
|
-
if dynamic_columns[col][dim] is not None and dynamic_columns[col][dim] != data[col].shape[dim]:
|
|
1712
|
-
raise RuntimeError(shape_mismatch)
|
|
1713
|
-
|
|
1714
|
-
def _dynamic_output_shapes(self):
|
|
1715
|
-
"""
|
|
1716
|
-
Get dynamic information of source data.
|
|
1717
|
-
|
|
1718
|
-
Returns:
|
|
1719
|
-
lists, dynamic_shapes, min_shapes, max_shapes of source data.
|
|
1720
|
-
"""
|
|
1721
|
-
if not self.dynamic_setting[1]:
|
|
1722
|
-
raise RuntimeError("dynamic_columns is not set, call set_dynamic_columns() by final Dataset Op.")
|
|
1723
|
-
|
|
1724
|
-
if self.saved_output_shapes is not None and self.saved_min_shapes is not None and \
|
|
1725
|
-
self.saved_max_shapes is not None:
|
|
1726
|
-
return self.saved_output_shapes, self.saved_min_shapes, self.saved_max_shapes
|
|
1727
|
-
|
|
1728
|
-
logger.warning("Calculating dynamic shape of input data, this will take a few minutes...")
|
|
1729
|
-
# Assume data1 shape is dynamic, data2 shape is fix
|
|
1730
|
-
dynamic_columns = self.dynamic_setting[1]
|
|
1731
|
-
# ["data1", "data2"]
|
|
1732
|
-
dataset_columns = self.get_col_names()
|
|
1733
|
-
Dataset.__check_dynamic_column_name(dynamic_columns, dataset_columns)
|
|
1734
|
-
|
|
1735
|
-
# Shape[1] of data1 is variable
|
|
1736
|
-
# {"data1": {(batch_size, 100, feat_len), (16, 200, 83)}, "data2": {(batch_size, feat_len)}}
|
|
1737
|
-
column_shape_set = {col: set() for col in dataset_columns}
|
|
1738
|
-
dataset_size_counter = 0
|
|
1739
|
-
for data in self.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
1740
|
-
dataset_size_counter += 1
|
|
1741
|
-
for col in data.keys():
|
|
1742
|
-
if col in dynamic_columns:
|
|
1743
|
-
Dataset.__check_dynamic_column_shape(data, col, dynamic_columns)
|
|
1744
|
-
column_shape_set[col].add(tuple(data[col].shape))
|
|
1745
|
-
|
|
1746
|
-
# we get dataset_size after dryrun
|
|
1747
|
-
self.dataset_size = dataset_size_counter
|
|
1748
|
-
|
|
1749
|
-
min_shapes, max_shapes, dynamic_shapes = list(), list(), list()
|
|
1750
|
-
for col, shape_set in column_shape_set.items():
|
|
1751
|
-
if len(shape_set) > 1:
|
|
1752
|
-
if col not in dynamic_columns:
|
|
1753
|
-
raise RuntimeError("column [" + col + "] has dynamic shape but not set by set_dynamic_columns()" +
|
|
1754
|
-
", shapes of [" + col + "]: " + str(list(shape_set)))
|
|
1755
|
-
shape_npy = np.array(list(shape_set))
|
|
1756
|
-
max_shape = shape_npy.max(axis=0)
|
|
1757
|
-
min_shape = shape_npy.min(axis=0)
|
|
1758
|
-
|
|
1759
|
-
# Set min shape to 1 due to unknown shuffle
|
|
1760
|
-
min_shape = np.where(np.equal(dynamic_columns[col], None), 1, min_shape)
|
|
1761
|
-
# Set dynamic dim to -1 for ME
|
|
1762
|
-
dynamic_shape = np.where(np.equal(dynamic_columns[col], None), -1, dynamic_columns[col])
|
|
1763
|
-
|
|
1764
|
-
max_shapes.append(max_shape.tolist())
|
|
1765
|
-
min_shapes.append(min_shape.tolist())
|
|
1766
|
-
dynamic_shapes.append(dynamic_shape.tolist())
|
|
1767
|
-
else:
|
|
1768
|
-
# Also append fix shape to keep order of column shape
|
|
1769
|
-
fix_shape = list(list(shape_set)[0])
|
|
1770
|
-
max_shapes.append(fix_shape)
|
|
1771
|
-
min_shapes.append(fix_shape)
|
|
1772
|
-
dynamic_shapes.append(fix_shape)
|
|
1773
|
-
if col in dynamic_columns:
|
|
1774
|
-
logger.warning("column [" + col + "] has no dynamic shape but set by set_dynamic_columns()")
|
|
1775
|
-
# Set min shape to 1 due to unknown shuffle
|
|
1776
|
-
min_shapes[-1] = np.where(np.equal(dynamic_columns[col], None), 1, fix_shape).tolist()
|
|
1777
|
-
# Set dynamic dim to -1 for ME
|
|
1778
|
-
dynamic_shapes[-1] = np.where(np.equal(dynamic_columns[col], None), -1, fix_shape).tolist()
|
|
1779
|
-
return dynamic_shapes, min_shapes, max_shapes
|
|
1780
|
-
|
|
1781
1680
|
def num_classes(self):
|
|
1782
1681
|
"""
|
|
1783
1682
|
Get the number of classes in a dataset.
|
|
@@ -1820,8 +1719,41 @@ class Dataset:
|
|
|
1820
1719
|
condition_name (str): The condition name that is used to toggle sending next row.
|
|
1821
1720
|
num_batch (Union[int, None]): The number of batches (rows) that are released.
|
|
1822
1721
|
When num_batch is None, it will default to the number specified by the
|
|
1823
|
-
sync_wait
|
|
1824
|
-
data (Any): The data passed to the callback, user defined
|
|
1722
|
+
sync_wait operation. Default: None.
|
|
1723
|
+
data (Any): The data passed to the callback, user defined. Default: None.
|
|
1724
|
+
|
|
1725
|
+
Examples:
|
|
1726
|
+
>>> import numpy as np
|
|
1727
|
+
>>>
|
|
1728
|
+
>>>
|
|
1729
|
+
>>> def gen():
|
|
1730
|
+
... for i in range(100):
|
|
1731
|
+
... yield (np.array(i),)
|
|
1732
|
+
>>>
|
|
1733
|
+
>>>
|
|
1734
|
+
>>> class Augment:
|
|
1735
|
+
... def __init__(self, loss):
|
|
1736
|
+
... self.loss = loss
|
|
1737
|
+
...
|
|
1738
|
+
... def preprocess(self, input_):
|
|
1739
|
+
... return input_
|
|
1740
|
+
...
|
|
1741
|
+
... def update(self, data):
|
|
1742
|
+
... self.loss = data["loss"]
|
|
1743
|
+
>>>
|
|
1744
|
+
>>>
|
|
1745
|
+
>>> batch_size = 10
|
|
1746
|
+
>>> dataset = ds.GeneratorDataset(gen, column_names=["input"])
|
|
1747
|
+
>>> aug = Augment(0)
|
|
1748
|
+
>>> dataset = dataset.sync_wait(condition_name='', num_batch=1)
|
|
1749
|
+
>>> dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
|
|
1750
|
+
>>> dataset = dataset.batch(batch_size)
|
|
1751
|
+
>>>
|
|
1752
|
+
>>> count = 0
|
|
1753
|
+
>>> for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
1754
|
+
... count += 1
|
|
1755
|
+
... data = {"loss": count}
|
|
1756
|
+
... dataset.sync_update(condition_name="", data=data)
|
|
1825
1757
|
"""
|
|
1826
1758
|
if (not isinstance(num_batch, int) and num_batch is not None) or \
|
|
1827
1759
|
(isinstance(num_batch, int) and num_batch <= 0):
|
|
@@ -1845,7 +1777,7 @@ class Dataset:
|
|
|
1845
1777
|
Return the size of batch.
|
|
1846
1778
|
|
|
1847
1779
|
Returns:
|
|
1848
|
-
int, the
|
|
1780
|
+
int, the batch size of data.
|
|
1849
1781
|
|
|
1850
1782
|
Examples:
|
|
1851
1783
|
>>> # dataset is an instance object of Dataset
|
|
@@ -1860,7 +1792,7 @@ class Dataset:
|
|
|
1860
1792
|
|
|
1861
1793
|
def get_repeat_count(self):
|
|
1862
1794
|
"""
|
|
1863
|
-
Get the replication times in RepeatDataset
|
|
1795
|
+
Get the replication times in RepeatDataset. Default: 1.
|
|
1864
1796
|
|
|
1865
1797
|
Returns:
|
|
1866
1798
|
int, the count of repeat.
|
|
@@ -1894,7 +1826,18 @@ class Dataset:
|
|
|
1894
1826
|
return {}
|
|
1895
1827
|
|
|
1896
1828
|
def reset(self):
|
|
1897
|
-
"""
|
|
1829
|
+
"""
|
|
1830
|
+
Reset the dataset for next epoch.
|
|
1831
|
+
|
|
1832
|
+
Examples:
|
|
1833
|
+
>>> mind_dataset_dir = ["/path/to/mind_dataset_file"]
|
|
1834
|
+
>>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
|
|
1835
|
+
>>> for _ in range(5):
|
|
1836
|
+
... num_iter = 0
|
|
1837
|
+
... for data in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
|
1838
|
+
... num_iter += 1
|
|
1839
|
+
... dataset.reset()
|
|
1840
|
+
"""
|
|
1898
1841
|
|
|
1899
1842
|
def is_shuffled(self):
|
|
1900
1843
|
"""Returns True if the dataset or its children is shuffled."""
|
|
@@ -1915,6 +1858,15 @@ class Dataset:
|
|
|
1915
1858
|
def parse(self, children=None):
|
|
1916
1859
|
raise NotImplementedError("Dataset has to implement parse method.")
|
|
1917
1860
|
|
|
1861
|
+
def __len__(self):
|
|
1862
|
+
"""
|
|
1863
|
+
Get the length of dataset.
|
|
1864
|
+
|
|
1865
|
+
Returns:
|
|
1866
|
+
int, the length of dataset.
|
|
1867
|
+
"""
|
|
1868
|
+
return self.get_dataset_size()
|
|
1869
|
+
|
|
1918
1870
|
@staticmethod
|
|
1919
1871
|
def _update_data_shard(num_shards, shard_id):
|
|
1920
1872
|
"""
|
|
@@ -1970,6 +1922,10 @@ class TextBaseDataset(Dataset):
|
|
|
1970
1922
|
Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab
|
|
1971
1923
|
which contains top_k most frequent words (if top_k is specified).
|
|
1972
1924
|
|
|
1925
|
+
Note:
|
|
1926
|
+
mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0
|
|
1927
|
+
and will be removed in a future version. Use mindspore.dataset.text.Vocab.from_dataset instead.
|
|
1928
|
+
|
|
1973
1929
|
Args:
|
|
1974
1930
|
columns(Union[str, list[str]]): Column names to get words from.
|
|
1975
1931
|
freq_range(tuple[int]): A tuple of integers (min_frequency, max_frequency). Words within the frequency
|
|
@@ -1984,22 +1940,60 @@ class TextBaseDataset(Dataset):
|
|
|
1984
1940
|
|
|
1985
1941
|
Returns:
|
|
1986
1942
|
Vocab, vocab built from the dataset.
|
|
1943
|
+
"""
|
|
1944
|
+
warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
|
|
1945
|
+
"and will be removed in a future version. "
|
|
1946
|
+
"Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
|
|
1987
1947
|
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
1948
|
+
def build_sentencepiece_vocab(self, columns, vocab_size, character_coverage, model_type, params):
|
|
1949
|
+
"""
|
|
1950
|
+
Function to create a SentencePieceVocab from source dataset.
|
|
1951
|
+
Desired source dataset is a text type dataset.
|
|
1952
|
+
|
|
1953
|
+
Note:
|
|
1954
|
+
mindspore.dataset.Dataset.build_sentencepiece_vocab is deprecated from version 2.0
|
|
1955
|
+
and will be removed in a future version. Use mindspore.dataset.text.SentencePieceVocab.from_dataset instead.
|
|
1956
|
+
|
|
1957
|
+
Args:
|
|
1958
|
+
columns(list[str]): Column names to get words from.
|
|
1959
|
+
vocab_size(int): Vocabulary size.
|
|
1960
|
+
character_coverage(float): Percentage of characters covered by the model, must be between
|
|
1961
|
+
0.98 and 1.0 Good defaults are: 0.9995 for languages with rich character sets like
|
|
1962
|
+
Japanese or Chinese character sets, and 1.0 for other languages with small character sets
|
|
1963
|
+
like English or Latin.
|
|
1964
|
+
model_type(SentencePieceModel): Model type. Choose from unigram (default), bpe, char, or word.
|
|
1965
|
+
The input sentence must be pretokenized when using word type.
|
|
1966
|
+
params(dict): Any extra optional parameters of sentencepiece library according to your raw data
|
|
1967
|
+
|
|
1968
|
+
Returns:
|
|
1969
|
+
SentencePieceVocab, vocab built from the dataset.
|
|
1970
|
+
"""
|
|
1971
|
+
warnings.warn("mindspore.dataset.Dataset.build_vocab is deprecated from version 2.0 "
|
|
1972
|
+
"and will be removed in a future version. "
|
|
1973
|
+
"Use mindspore.dataset.text.Vocab.from_dataset instead.", DeprecationWarning)
|
|
2002
1974
|
|
|
1975
|
+
def _build_vocab(self, columns, freq_range, top_k, special_tokens, special_first):
|
|
1976
|
+
"""
|
|
1977
|
+
Function to create a Vocab from source dataset.
|
|
1978
|
+
Desired source dataset is a text type dataset.
|
|
1979
|
+
|
|
1980
|
+
Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab
|
|
1981
|
+
which contains top_k most frequent words (if top_k is specified).
|
|
1982
|
+
|
|
1983
|
+
Args:
|
|
1984
|
+
columns(Union[str, list[str]]): Column names to get words from.
|
|
1985
|
+
freq_range(tuple[int]): A tuple of integers (min_frequency, max_frequency). Words within the frequency
|
|
1986
|
+
range will be stored.
|
|
1987
|
+
Naturally 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency
|
|
1988
|
+
can be set to default, which corresponds to 0/total_words separately.
|
|
1989
|
+
top_k(int): Number of words to be built into vocab. top_k most frequent words are
|
|
1990
|
+
taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken
|
|
1991
|
+
special_tokens(list[str]): A list of strings, each one is a special token.
|
|
1992
|
+
special_first(bool): Whether special_tokens will be prepended/appended to vocab, If special_tokens
|
|
1993
|
+
is specified and special_first is set to default, special_tokens will be prepended.
|
|
1994
|
+
|
|
1995
|
+
Returns:
|
|
1996
|
+
Vocab, vocab built from the dataset.
|
|
2003
1997
|
"""
|
|
2004
1998
|
vocab = cde.Vocab()
|
|
2005
1999
|
columns = replace_none(columns, [])
|
|
@@ -2032,7 +2026,7 @@ class TextBaseDataset(Dataset):
|
|
|
2032
2026
|
|
|
2033
2027
|
return vocab
|
|
2034
2028
|
|
|
2035
|
-
def
|
|
2029
|
+
def _build_sentencepiece_vocab(self, columns, vocab_size, character_coverage, model_type, params):
|
|
2036
2030
|
"""
|
|
2037
2031
|
Function to create a SentencePieceVocab from source dataset.
|
|
2038
2032
|
Desired source dataset is a text type dataset.
|
|
@@ -2050,13 +2044,6 @@ class TextBaseDataset(Dataset):
|
|
|
2050
2044
|
|
|
2051
2045
|
Returns:
|
|
2052
2046
|
SentencePieceVocab, vocab built from the dataset.
|
|
2053
|
-
|
|
2054
|
-
Examples:
|
|
2055
|
-
>>> from mindspore.dataset.text import SentencePieceModel
|
|
2056
|
-
>>>
|
|
2057
|
-
>>> # You can construct any text dataset as source, take TextFileDataset as example.
|
|
2058
|
-
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
|
2059
|
-
>>> dataset = dataset.build_sentencepiece_vocab(["text"], 5000, 0.9995, SentencePieceModel.UNIGRAM, {})
|
|
2060
2047
|
"""
|
|
2061
2048
|
if not isinstance(model_type, SentencePieceModel):
|
|
2062
2049
|
raise TypeError("Argument model_type with value {0} is not of type SentencePieceModel, but got {1}." \
|
|
@@ -2264,7 +2251,7 @@ class MappableDataset(SourceDataset):
|
|
|
2264
2251
|
- The sum of split sizes > K, the difference will be removed from the first large
|
|
2265
2252
|
enough split such that it will have at least 1 row after removing the difference.
|
|
2266
2253
|
|
|
2267
|
-
randomize (bool, optional): Determines whether or not to split the data randomly
|
|
2254
|
+
randomize (bool, optional): Determines whether or not to split the data randomly. Default: True.
|
|
2268
2255
|
If True, the data will be randomly split. Otherwise, each split will be created with
|
|
2269
2256
|
consecutive rows from the dataset.
|
|
2270
2257
|
|
|
@@ -2340,7 +2327,7 @@ class MappableDataset(SourceDataset):
|
|
|
2340
2327
|
|
|
2341
2328
|
class BucketBatchByLengthDataset(UnionBaseDataset):
|
|
2342
2329
|
"""
|
|
2343
|
-
The result of applying BucketBatchByLength
|
|
2330
|
+
The result of applying BucketBatchByLength operation to the input dataset.
|
|
2344
2331
|
"""
|
|
2345
2332
|
|
|
2346
2333
|
def __init__(self, input_dataset, column_names, bucket_boundaries, bucket_batch_sizes, element_length_function,
|
|
@@ -2391,17 +2378,17 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
|
|
|
2391
2378
|
|
|
2392
2379
|
class BatchDataset(UnionBaseDataset):
|
|
2393
2380
|
"""
|
|
2394
|
-
The result of applying Batch
|
|
2381
|
+
The result of applying Batch operation to the input dataset.
|
|
2395
2382
|
|
|
2396
2383
|
Args:
|
|
2397
2384
|
input_dataset (Dataset): Input Dataset to be batched.
|
|
2398
2385
|
batch_size (Union[int, function]): The number of rows each batch is created with. An
|
|
2399
2386
|
int or callable which takes exactly 1 parameter, BatchInfo.
|
|
2400
2387
|
drop_remainder (bool, optional): Determines whether or not to drop the last
|
|
2401
|
-
possibly incomplete batch
|
|
2388
|
+
possibly incomplete batch. Default: False. If True, and if there are less
|
|
2402
2389
|
than batch_size rows available to make the last batch, then those rows will
|
|
2403
2390
|
be dropped and not propagated to the child node.
|
|
2404
|
-
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel
|
|
2391
|
+
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
|
|
2405
2392
|
per_batch_map (callable, optional): Per batch map callable. A callable which takes
|
|
2406
2393
|
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch of
|
|
2407
2394
|
Tensors on a given column. The number of lists should match with number of entries in input_columns. The
|
|
@@ -2411,21 +2398,15 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2411
2398
|
output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
|
|
2412
2399
|
the last operation. This parameter is mandatory if len(input_columns) !=
|
|
2413
2400
|
len(output_columns). The size of this list must match the number of output
|
|
2414
|
-
columns of the last operation.
|
|
2415
|
-
name as the input columns, i.e., the columns will be replaced
|
|
2416
|
-
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
|
|
2417
|
-
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
|
2418
|
-
is not just the columns specified in parameter input_columns and output_columns.
|
|
2419
|
-
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
|
2420
|
-
will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
|
|
2401
|
+
columns of the last operation. Default: None, output columns will have the same
|
|
2402
|
+
name as the input columns, i.e., the columns will be replaced.
|
|
2421
2403
|
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
|
|
2422
|
-
data between processes. This is only used if python_multiprocessing is set to True
|
|
2404
|
+
data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
|
|
2423
2405
|
|
|
2424
2406
|
"""
|
|
2425
2407
|
|
|
2426
2408
|
def __init__(self, input_dataset, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None,
|
|
2427
|
-
input_columns=None, output_columns=None,
|
|
2428
|
-
python_multiprocessing=False, max_rowsize=16):
|
|
2409
|
+
input_columns=None, output_columns=None, python_multiprocessing=False, max_rowsize=16):
|
|
2429
2410
|
super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers)
|
|
2430
2411
|
|
|
2431
2412
|
if BatchDataset._is_ancestor_of_repeat(input_dataset):
|
|
@@ -2443,10 +2424,6 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2443
2424
|
|
|
2444
2425
|
self.input_columns = to_list(input_columns)
|
|
2445
2426
|
self.output_columns = to_list(output_columns)
|
|
2446
|
-
self.column_order = to_list(column_order)
|
|
2447
|
-
|
|
2448
|
-
self.pad = bool(pad_info is not None)
|
|
2449
|
-
self.pad_info = replace_none(pad_info, dict())
|
|
2450
2427
|
|
|
2451
2428
|
self.python_multiprocessing = python_multiprocessing
|
|
2452
2429
|
self.process_pool = None
|
|
@@ -2458,9 +2435,9 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2458
2435
|
del self.process_pool
|
|
2459
2436
|
|
|
2460
2437
|
def parse(self, children=None):
|
|
2461
|
-
return cde.BatchNode(children[0], self.batch_size, self.drop_remainder,
|
|
2462
|
-
self.output_columns, self.
|
|
2463
|
-
self.
|
|
2438
|
+
return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, False, self.input_columns,
|
|
2439
|
+
self.output_columns, self.batch_size_func, self.per_batch_map, {},
|
|
2440
|
+
self.process_pool)
|
|
2464
2441
|
|
|
2465
2442
|
@staticmethod
|
|
2466
2443
|
def _is_ancestor_of_repeat(dataset):
|
|
@@ -2506,6 +2483,10 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2506
2483
|
"""
|
|
2507
2484
|
if self.python_multiprocessing and platform.system().lower() == 'windows':
|
|
2508
2485
|
logger.warning("Python multiprocessing is not supported on Windows platform.")
|
|
2486
|
+
if self.python_multiprocessing and get_debug_mode():
|
|
2487
|
+
logger.warning("Python multiprocessing is not supported in debug mode."
|
|
2488
|
+
" Ignoring Python multiprocessing for batch operation.")
|
|
2489
|
+
self.python_multiprocessing = False
|
|
2509
2490
|
if self.python_multiprocessing and platform.system().lower() != 'windows':
|
|
2510
2491
|
if self.per_batch_map is None:
|
|
2511
2492
|
logger.warning("per_batch_map is None so python_multiprocessing is ignored for batch.")
|
|
@@ -2526,7 +2507,7 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2526
2507
|
|
|
2527
2508
|
class BatchInfo(cde.CBatchInfo):
|
|
2528
2509
|
"""
|
|
2529
|
-
Only the batch size function and per_batch_map of the batch
|
|
2510
|
+
Only the batch size function and per_batch_map of the batch operation can dynamically adjust parameters
|
|
2530
2511
|
based on the number of batches and epochs during training.
|
|
2531
2512
|
"""
|
|
2532
2513
|
|
|
@@ -2549,7 +2530,7 @@ class BlockReleasePair:
|
|
|
2549
2530
|
|
|
2550
2531
|
Args:
|
|
2551
2532
|
init_release_rows (int): Number of lines to allow through the pipeline.
|
|
2552
|
-
callback (function): The callback function that will be called when release is called
|
|
2533
|
+
callback (function): The callback function that will be called when release is called. Default: None.
|
|
2553
2534
|
"""
|
|
2554
2535
|
|
|
2555
2536
|
def __init__(self, init_release_rows, callback=None):
|
|
@@ -2612,6 +2593,80 @@ class BlockReleasePair:
|
|
|
2612
2593
|
self.cv.notify_all()
|
|
2613
2594
|
|
|
2614
2595
|
|
|
2596
|
+
class PaddedBatchDataset(UnionBaseDataset):
|
|
2597
|
+
"""
|
|
2598
|
+
The result of applying Batch operation to the input dataset.
|
|
2599
|
+
|
|
2600
|
+
Args:
|
|
2601
|
+
input_dataset (Dataset): Input Dataset to be batched.
|
|
2602
|
+
batch_size (Union[int, function]): The number of rows each batch is created with. An
|
|
2603
|
+
int or callable which takes exactly 1 parameter, BatchInfo.
|
|
2604
|
+
drop_remainder (bool, optional): Determines whether or not to drop the last
|
|
2605
|
+
possibly incomplete batch. Default: False. If True, and if there are less
|
|
2606
|
+
than batch_size rows available to make the last batch, then those rows will
|
|
2607
|
+
be dropped and not propagated to the child node.
|
|
2608
|
+
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None.
|
|
2609
|
+
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
|
2610
|
+
will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
|
|
2611
|
+
"""
|
|
2612
|
+
|
|
2613
|
+
def __init__(self, input_dataset, batch_size, drop_remainder=False, num_parallel_workers=None, pad_info=None):
|
|
2614
|
+
super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers)
|
|
2615
|
+
|
|
2616
|
+
if PaddedBatchDataset._is_ancestor_of_repeat(input_dataset):
|
|
2617
|
+
logger.warning("Repeat is located before padded_batch, data from two epochs can be batched together.")
|
|
2618
|
+
|
|
2619
|
+
PaddedBatchDataset._update_batch_size_for_syncwait(input_dataset, batch_size)
|
|
2620
|
+
|
|
2621
|
+
# if batch_size is callable, set batch_size to 1 and batch_size_func to that callable function
|
|
2622
|
+
self.batch_size = batch_size if not callable(batch_size) else 1
|
|
2623
|
+
self.batch_size_func = None if not callable(batch_size) else batch_size
|
|
2624
|
+
|
|
2625
|
+
self.drop_remainder = replace_none(drop_remainder, False)
|
|
2626
|
+
|
|
2627
|
+
self.pad = bool(pad_info is not None)
|
|
2628
|
+
self.pad_info = replace_none(pad_info, dict())
|
|
2629
|
+
|
|
2630
|
+
def parse(self, children=None):
|
|
2631
|
+
return cde.BatchNode(children[0], self.batch_size, self.drop_remainder, self.pad, [],
|
|
2632
|
+
[], self.batch_size_func, None, self.pad_info, None)
|
|
2633
|
+
|
|
2634
|
+
@staticmethod
|
|
2635
|
+
def _is_ancestor_of_repeat(dataset):
|
|
2636
|
+
"""
|
|
2637
|
+
Utility function to find the case where repeat is used before batch.
|
|
2638
|
+
|
|
2639
|
+
Args:
|
|
2640
|
+
dataset (Dataset): Dataset to be checked.
|
|
2641
|
+
|
|
2642
|
+
Returns:
|
|
2643
|
+
bool, whether repeat is used before batch.
|
|
2644
|
+
"""
|
|
2645
|
+
if isinstance(dataset, RepeatDataset):
|
|
2646
|
+
return True
|
|
2647
|
+
flag = False
|
|
2648
|
+
for input_dataset in dataset.children:
|
|
2649
|
+
flag = flag | PaddedBatchDataset._is_ancestor_of_repeat(input_dataset)
|
|
2650
|
+
return flag
|
|
2651
|
+
|
|
2652
|
+
@staticmethod
|
|
2653
|
+
def _update_batch_size_for_syncwait(dataset, batch_size):
|
|
2654
|
+
"""
|
|
2655
|
+
Utility function to notify batch size to sync_wait.
|
|
2656
|
+
|
|
2657
|
+
Args:
|
|
2658
|
+
dataset (Dataset): Dataset to be checked.
|
|
2659
|
+
batch_size (int): batch size to notify.
|
|
2660
|
+
"""
|
|
2661
|
+
if isinstance(dataset, SyncWaitDataset):
|
|
2662
|
+
dataset.update_sync_batch_size(batch_size)
|
|
2663
|
+
for input_dataset in dataset.children:
|
|
2664
|
+
PaddedBatchDataset._update_batch_size_for_syncwait(input_dataset, batch_size)
|
|
2665
|
+
|
|
2666
|
+
def __deepcopy__(self, memodict):
|
|
2667
|
+
return self.__safe_deepcopy__(memodict, exclude=("batch_size_func", "__transfer_dataset__"))
|
|
2668
|
+
|
|
2669
|
+
|
|
2615
2670
|
class SyncWaitDataset(UnionBaseDataset):
|
|
2616
2671
|
"""
|
|
2617
2672
|
The result of adding a blocking condition to the input Dataset.
|
|
@@ -2620,7 +2675,7 @@ class SyncWaitDataset(UnionBaseDataset):
|
|
|
2620
2675
|
input_dataset (Dataset): Input dataset to apply flow control.
|
|
2621
2676
|
num_batch (int): Number of batches without blocking at the start of each epoch.
|
|
2622
2677
|
condition_name (str): Condition name that is used to toggle sending next row.
|
|
2623
|
-
callback (function): Callback function that will be invoked when sync_update is called
|
|
2678
|
+
callback (function): Callback function that will be invoked when sync_update is called. Default: None.
|
|
2624
2679
|
|
|
2625
2680
|
Raises:
|
|
2626
2681
|
RuntimeError: If condition name already exists.
|
|
@@ -2670,7 +2725,7 @@ class SyncWaitDataset(UnionBaseDataset):
|
|
|
2670
2725
|
Returns:
|
|
2671
2726
|
bool, whether sync_wait is used before batch.
|
|
2672
2727
|
"""
|
|
2673
|
-
if isinstance(dataset, BatchDataset):
|
|
2728
|
+
if isinstance(dataset, (BatchDataset, PaddedBatchDataset)):
|
|
2674
2729
|
return True
|
|
2675
2730
|
flag = False
|
|
2676
2731
|
for input_dataset in dataset.children:
|
|
@@ -2683,14 +2738,14 @@ class SyncWaitDataset(UnionBaseDataset):
|
|
|
2683
2738
|
|
|
2684
2739
|
class ShuffleDataset(UnionBaseDataset):
|
|
2685
2740
|
"""
|
|
2686
|
-
The result of applying Shuffle
|
|
2741
|
+
The result of applying Shuffle operation to the input Dataset.
|
|
2687
2742
|
|
|
2688
2743
|
Args:
|
|
2689
2744
|
input_dataset (Dataset): Input Dataset to be shuffled.
|
|
2690
2745
|
buffer_size (int): Size of the buffer.
|
|
2691
2746
|
|
|
2692
2747
|
Raises:
|
|
2693
|
-
RuntimeError: If exist sync
|
|
2748
|
+
RuntimeError: If exist sync operations before shuffle.
|
|
2694
2749
|
"""
|
|
2695
2750
|
|
|
2696
2751
|
def __init__(self, input_dataset, buffer_size):
|
|
@@ -2727,17 +2782,38 @@ class _PythonCallable:
|
|
|
2727
2782
|
self.pool = pool
|
|
2728
2783
|
# Python callable index
|
|
2729
2784
|
self.idx = idx
|
|
2785
|
+
self.check_interval = get_multiprocessing_timeout_interval()
|
|
2730
2786
|
|
|
2731
2787
|
def __call__(self, *args):
|
|
2732
2788
|
result = None
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2789
|
+
start_time = time.time()
|
|
2790
|
+
count = 1
|
|
2791
|
+
get_data_from_worker_process = False
|
|
2792
|
+
while get_data_from_worker_process is False:
|
|
2793
|
+
cost_time = time.time() - start_time
|
|
2794
|
+
if cost_time > (self.check_interval * count):
|
|
2795
|
+
logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
|
|
2796
|
+
"workers of map operation cost long time to process next data. "
|
|
2797
|
+
"Worker process list are: " + str(self.pool.get_pids()) + ", you can use "
|
|
2798
|
+
"\"py-spy dump -p {PID} -l -s \""
|
|
2799
|
+
"to dump the worker process stack. You can also set the timeout interval by "
|
|
2800
|
+
"ds.config.set_multiprocessing_interval to adjust the output frequency of this "
|
|
2801
|
+
"log.")
|
|
2802
|
+
count += 1
|
|
2803
|
+
if self.pool.is_running() and check_iterator_cleanup() is False:
|
|
2804
|
+
try:
|
|
2805
|
+
result = self.pool.execute(self.idx, *args)
|
|
2806
|
+
except multiprocessing.TimeoutError:
|
|
2807
|
+
continue
|
|
2808
|
+
get_data_from_worker_process = True
|
|
2809
|
+
else:
|
|
2810
|
+
# worker process is stopped
|
|
2811
|
+
logger.info("The worker process of map operation is stopped. "
|
|
2812
|
+
"So return None to main thread and break the main thread.")
|
|
2813
|
+
return None
|
|
2814
|
+
# got value from worker process
|
|
2815
|
+
if not isinstance(result, tuple) and get_data_from_worker_process is True:
|
|
2816
|
+
result = (result,)
|
|
2741
2817
|
return result
|
|
2742
2818
|
|
|
2743
2819
|
def to_json(self):
|
|
@@ -2833,7 +2909,7 @@ def _worker_loop(operations, pipe, seed=get_seed()):
|
|
|
2833
2909
|
pipe.worker_send(output_tensors)
|
|
2834
2910
|
except Exception:
|
|
2835
2911
|
pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
|
|
2836
|
-
return
|
|
2912
|
+
# Do not return
|
|
2837
2913
|
|
|
2838
2914
|
|
|
2839
2915
|
def worker_target(operations, seed=get_seed()):
|
|
@@ -2922,7 +2998,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
2922
2998
|
self.ppid = os.getpid()
|
|
2923
2999
|
self.hook = None
|
|
2924
3000
|
self.warning_ctl = None
|
|
2925
|
-
|
|
3001
|
+
# cache thread (get_ident()) to worker_id mapping in Python layer
|
|
3002
|
+
self.python_threads_to_workers = {}
|
|
2926
3003
|
|
|
2927
3004
|
def __del__(self):
|
|
2928
3005
|
try:
|
|
@@ -3067,13 +3144,24 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3067
3144
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
3068
3145
|
|
|
3069
3146
|
def launch(self, op_id=-1):
|
|
3147
|
+
"""
|
|
3148
|
+
Launch Python multiprocessing pool.
|
|
3149
|
+
|
|
3150
|
+
Args:
|
|
3151
|
+
pop_id: ID for operation to have Python multiprocessing pool launched
|
|
3152
|
+
|
|
3153
|
+
Returns:
|
|
3154
|
+
Python multiprocssing pool is launched.
|
|
3155
|
+
"""
|
|
3156
|
+
self.python_threads_to_workers = {}
|
|
3070
3157
|
self.op_id = op_id
|
|
3071
3158
|
logger.info("Launching new Python Multiprocessing pool for Op:" + str(self.op_id))
|
|
3072
3159
|
if self.is_mp_enabled():
|
|
3073
|
-
|
|
3074
|
-
The existing pool will be terminated first.
|
|
3160
|
+
message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
|
|
3161
|
+
" The existing pool will be terminated first."
|
|
3162
|
+
logger.warning(message)
|
|
3075
3163
|
self.terminate()
|
|
3076
|
-
|
|
3164
|
+
self.reset()
|
|
3077
3165
|
self.create_pool()
|
|
3078
3166
|
|
|
3079
3167
|
def create_pool(self):
|
|
@@ -3109,7 +3197,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3109
3197
|
atexit.register(self.terminate)
|
|
3110
3198
|
|
|
3111
3199
|
def terminate(self):
|
|
3112
|
-
logger.info("Terminating Python Multiprocessing for Op:" + str(self.op_id))
|
|
3113
3200
|
self.close_all_workers()
|
|
3114
3201
|
self.abort_watchdog()
|
|
3115
3202
|
|
|
@@ -3166,7 +3253,10 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3166
3253
|
Execute
|
|
3167
3254
|
"""
|
|
3168
3255
|
t_id = threading.get_ident()
|
|
3169
|
-
worker_id
|
|
3256
|
+
# get the worker_id from Python layer cache first, get from Cpp layer if not found.
|
|
3257
|
+
worker_id = self.python_threads_to_workers.setdefault(t_id, self.get_thread_to_worker())
|
|
3258
|
+
if worker_id >= len(self.workers):
|
|
3259
|
+
raise RuntimeError("[Internal] worker_id value is greater than number of available workers!")
|
|
3170
3260
|
|
|
3171
3261
|
# todo check_iterator_cleanup
|
|
3172
3262
|
if self.is_running() and check_iterator_cleanup() is False:
|
|
@@ -3220,38 +3310,32 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3220
3310
|
|
|
3221
3311
|
class MapDataset(UnionBaseDataset):
|
|
3222
3312
|
"""
|
|
3223
|
-
The result of applying the Map
|
|
3313
|
+
The result of applying the Map operation to the input Dataset.
|
|
3224
3314
|
|
|
3225
3315
|
Args:
|
|
3226
3316
|
input_dataset (Dataset): Input Dataset to be mapped.
|
|
3227
3317
|
operations (Union[list[TensorOperation], list[functions]]): A function mapping a nested structure of tensors
|
|
3228
|
-
to another nested structure of tensor
|
|
3229
|
-
input_columns (Union[str, list[str]]): List of names of the input columns
|
|
3230
|
-
|
|
3231
|
-
The size of the list should match the number of inputs of the first
|
|
3318
|
+
to another nested structure of tensor. Default: None.
|
|
3319
|
+
input_columns (Union[str, list[str]]): List of names of the input columns.
|
|
3320
|
+
Default: None, the operations will be applied on the first columns in the dataset.
|
|
3321
|
+
The size of the list should match the number of inputs of the first operation.
|
|
3232
3322
|
output_columns (Union[str, list[str]], optional): List of names of the output columns.
|
|
3233
|
-
The size of the list should match the number of outputs of the last
|
|
3234
|
-
|
|
3235
|
-
be replaced
|
|
3236
|
-
column_order (list[str], optional): Specifies the list of all the columns you need in the whole
|
|
3237
|
-
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
|
3238
|
-
is not just the columns specified in parameter input_columns and output_columns.
|
|
3323
|
+
The size of the list should match the number of outputs of the last operation.
|
|
3324
|
+
Default: None, output columns will be the input columns, i.e., the columns will
|
|
3325
|
+
be replaced.
|
|
3239
3326
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
3240
|
-
in parallel
|
|
3327
|
+
in parallel. Default: None.
|
|
3241
3328
|
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
|
|
3242
|
-
option could be beneficial if the Python operation is computational heavy
|
|
3329
|
+
option could be beneficial if the Python operation is computational heavy. Default: False.
|
|
3243
3330
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
|
3244
|
-
|
|
3245
|
-
callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called
|
|
3331
|
+
Default: None, which means no cache is used.
|
|
3332
|
+
callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: None.
|
|
3246
3333
|
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
|
|
3247
|
-
data between processes.
|
|
3248
|
-
offload (bool, optional): Flag to indicate whether offload is used
|
|
3249
|
-
|
|
3250
|
-
Raises:
|
|
3251
|
-
ValueError: If len(input_columns) != len(output_columns) and column_order is not specified.
|
|
3334
|
+
data between processes. This is only used if python_multiprocessing is set to True. Default: 16.
|
|
3335
|
+
offload (bool, optional): Flag to indicate whether offload is used. Default: None.
|
|
3252
3336
|
"""
|
|
3253
3337
|
|
|
3254
|
-
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
|
|
3338
|
+
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
|
|
3255
3339
|
num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None, max_rowsize=16,
|
|
3256
3340
|
offload=None):
|
|
3257
3341
|
super().__init__(children=input_dataset, num_parallel_workers=num_parallel_workers, cache=cache)
|
|
@@ -3270,17 +3354,10 @@ class MapDataset(UnionBaseDataset):
|
|
|
3270
3354
|
|
|
3271
3355
|
self.input_columns = to_list(input_columns)
|
|
3272
3356
|
self.output_columns = to_list(output_columns)
|
|
3273
|
-
self.column_order = replace_none(column_order, [])
|
|
3274
3357
|
|
|
3275
3358
|
# If output_columns were not provided then use input_columns
|
|
3276
3359
|
self.output_columns = self.input_columns if not self.output_columns else self.output_columns
|
|
3277
3360
|
|
|
3278
|
-
if self.input_columns and self.output_columns \
|
|
3279
|
-
and len(self.input_columns) != len(self.output_columns) \
|
|
3280
|
-
and not self.column_order:
|
|
3281
|
-
raise ValueError("When length of input_columns and output_columns are not equal,"
|
|
3282
|
-
" column_order must be specified.")
|
|
3283
|
-
|
|
3284
3361
|
self.python_multiprocessing = python_multiprocessing
|
|
3285
3362
|
self.process_pool = None
|
|
3286
3363
|
|
|
@@ -3297,14 +3374,23 @@ class MapDataset(UnionBaseDataset):
|
|
|
3297
3374
|
if count_new_transforms + count_pyfunc == len(operations):
|
|
3298
3375
|
prev_op = None
|
|
3299
3376
|
for op in operations:
|
|
3377
|
+
# skip user added DebugHook to avoid changing to Py-implementation.
|
|
3378
|
+
if self.__is_debug_hook_op(op):
|
|
3379
|
+
if prev_op:
|
|
3380
|
+
# manually set previous_op_name
|
|
3381
|
+
prev_op_name = self.__parse_op_name(prev_op)
|
|
3382
|
+
op.set_previous_op_name(prev_op_name)
|
|
3383
|
+
continue
|
|
3300
3384
|
if op.implementation is None:
|
|
3301
3385
|
if prev_op and prev_op.implementation == Implementation.PY:
|
|
3302
3386
|
op.implementation = Implementation.PY
|
|
3303
3387
|
else:
|
|
3304
3388
|
op.implementation = Implementation.C
|
|
3305
3389
|
prev_op = op
|
|
3390
|
+
operations = self.__insert_debug_wrapper(operations)
|
|
3306
3391
|
operations = transforms.transforms.Compose.reduce(operations)
|
|
3307
3392
|
elif count_old_transforms + count_pyfunc + count_non_data_vision_transforms == len(operations):
|
|
3393
|
+
operations = self.__insert_debug_wrapper(operations)
|
|
3308
3394
|
operations = transforms.py_transforms.Compose.reduce(operations)
|
|
3309
3395
|
else:
|
|
3310
3396
|
raise RuntimeError("Mixing old legacy c/py_transforms and new unified transforms is not allowed.")
|
|
@@ -3313,7 +3399,7 @@ class MapDataset(UnionBaseDataset):
|
|
|
3313
3399
|
self.prepare_multiprocessing()
|
|
3314
3400
|
|
|
3315
3401
|
callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
|
|
3316
|
-
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
|
|
3402
|
+
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
|
|
3317
3403
|
callbacks, self.max_rowsize, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
|
|
3318
3404
|
|
|
3319
3405
|
def __deepcopy__(self, memodict):
|
|
@@ -3324,6 +3410,49 @@ class MapDataset(UnionBaseDataset):
|
|
|
3324
3410
|
self.process_pool.terminate()
|
|
3325
3411
|
del self.process_pool
|
|
3326
3412
|
|
|
3413
|
+
@staticmethod
|
|
3414
|
+
def __parse_op_name(op):
|
|
3415
|
+
"""
|
|
3416
|
+
Utility method to get operation name.
|
|
3417
|
+
"""
|
|
3418
|
+
op_name = ""
|
|
3419
|
+
if isinstance(op, transforms.py_transforms_util.FuncWrapper):
|
|
3420
|
+
try:
|
|
3421
|
+
op_name = op.transform.__name__
|
|
3422
|
+
except (AttributeError,):
|
|
3423
|
+
op_name = op.transform.__class__.__name__
|
|
3424
|
+
else:
|
|
3425
|
+
op_name = op.__class__.__name__
|
|
3426
|
+
return op_name
|
|
3427
|
+
|
|
3428
|
+
@staticmethod
|
|
3429
|
+
def __construct_debug_hook(previous_op_name=None):
|
|
3430
|
+
"""
|
|
3431
|
+
Wrap debug hook into FuncWrapper.
|
|
3432
|
+
"""
|
|
3433
|
+
inserted_functions = []
|
|
3434
|
+
debug_hook_list = _get_debug_hook_list()
|
|
3435
|
+
if debug_hook_list:
|
|
3436
|
+
for fn in debug_hook_list:
|
|
3437
|
+
# making deep copy to allow each debug hook instance hold unique variables
|
|
3438
|
+
new_fn = copy.deepcopy(fn)
|
|
3439
|
+
new_fn.set_previous_op_name(previous_op_name)
|
|
3440
|
+
inserted_func = transforms.py_transforms_util.FuncWrapper(new_fn)
|
|
3441
|
+
inserted_func.implementation = Implementation.PY
|
|
3442
|
+
inserted_functions.append(inserted_func)
|
|
3443
|
+
return inserted_functions
|
|
3444
|
+
|
|
3445
|
+
@staticmethod
|
|
3446
|
+
def __is_debug_hook_op(op):
|
|
3447
|
+
"""
|
|
3448
|
+
Check if the op is user added DebugHook and skip it to avoid changing transforms implementation.
|
|
3449
|
+
"""
|
|
3450
|
+
if isinstance(op, DebugHook):
|
|
3451
|
+
if not get_debug_mode():
|
|
3452
|
+
raise ValueError("It is not allowed to inject DebugHook object in non-debug mode.")
|
|
3453
|
+
return True
|
|
3454
|
+
return False
|
|
3455
|
+
|
|
3327
3456
|
@staticmethod
|
|
3328
3457
|
def __count_pyfuncs(operations):
|
|
3329
3458
|
"""
|
|
@@ -3389,6 +3518,10 @@ class MapDataset(UnionBaseDataset):
|
|
|
3389
3518
|
if self.python_multiprocessing and platform.system().lower() == 'windows':
|
|
3390
3519
|
logger.warning("Python multiprocessing is not supported on Windows platform.")
|
|
3391
3520
|
return
|
|
3521
|
+
if self.python_multiprocessing and get_debug_mode():
|
|
3522
|
+
logger.warning("Python multiprocessing is not supported in debug mode."
|
|
3523
|
+
" Ignoring Python multiprocessing for map operation.")
|
|
3524
|
+
return
|
|
3392
3525
|
if self.python_multiprocessing:
|
|
3393
3526
|
iter_specific_operations = []
|
|
3394
3527
|
callable_list = []
|
|
@@ -3419,6 +3552,19 @@ class MapDataset(UnionBaseDataset):
|
|
|
3419
3552
|
iter_specific_operations.append(op)
|
|
3420
3553
|
self.operations = iter_specific_operations
|
|
3421
3554
|
|
|
3555
|
+
def __insert_debug_wrapper(self, operations):
|
|
3556
|
+
"""
|
|
3557
|
+
Insert DebuggerWrapper before and after each op if debug mode is on.
|
|
3558
|
+
"""
|
|
3559
|
+
if not get_debug_mode():
|
|
3560
|
+
return operations
|
|
3561
|
+
inserted_operations = self.__construct_debug_hook()
|
|
3562
|
+
for op in operations:
|
|
3563
|
+
inserted_operations.append(op)
|
|
3564
|
+
op_name = self.__parse_op_name(op)
|
|
3565
|
+
inserted_operations.extend(self.__construct_debug_hook(op_name))
|
|
3566
|
+
return inserted_operations
|
|
3567
|
+
|
|
3422
3568
|
def __decompose_callable_operations(self):
|
|
3423
3569
|
"""
|
|
3424
3570
|
Decompose operations and build list of old legacy ops which are callable
|
|
@@ -3441,10 +3587,10 @@ class FilterDataset(UnionBaseDataset):
|
|
|
3441
3587
|
Args:
|
|
3442
3588
|
input_dataset (Dataset): Input Dataset to be mapped.
|
|
3443
3589
|
predicate (callable): Python callable which returns a boolean value. If False then filter the element.
|
|
3444
|
-
input_columns (Union[str, list[str]], optional): List of names of the input columns
|
|
3445
|
-
|
|
3590
|
+
input_columns (Union[str, list[str]], optional): List of names of the input columns.
|
|
3591
|
+
Default: None, the predicate will be applied to all columns in the dataset.
|
|
3446
3592
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
3447
|
-
in parallel
|
|
3593
|
+
in parallel. Default: None.
|
|
3448
3594
|
"""
|
|
3449
3595
|
|
|
3450
3596
|
def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None):
|
|
@@ -3458,11 +3604,11 @@ class FilterDataset(UnionBaseDataset):
|
|
|
3458
3604
|
|
|
3459
3605
|
class RepeatDataset(UnionBaseDataset):
|
|
3460
3606
|
"""
|
|
3461
|
-
The result of applying Repeat
|
|
3607
|
+
The result of applying Repeat operation to the input Dataset.
|
|
3462
3608
|
|
|
3463
3609
|
Args:
|
|
3464
3610
|
input_dataset (Dataset): Input Dataset to be repeated.
|
|
3465
|
-
count (int): Number of times the dataset will be repeated
|
|
3611
|
+
count (int): Number of times the dataset will be repeated. Default: -1, repeat indefinitely.
|
|
3466
3612
|
"""
|
|
3467
3613
|
|
|
3468
3614
|
def __init__(self, input_dataset, count):
|
|
@@ -3475,7 +3621,7 @@ class RepeatDataset(UnionBaseDataset):
|
|
|
3475
3621
|
|
|
3476
3622
|
class SkipDataset(UnionBaseDataset):
|
|
3477
3623
|
"""
|
|
3478
|
-
The result of applying Skip
|
|
3624
|
+
The result of applying Skip operation to the input Dataset.
|
|
3479
3625
|
|
|
3480
3626
|
Args:
|
|
3481
3627
|
input_dataset (Dataset): Input dataset to have elements skipped.
|
|
@@ -3492,7 +3638,7 @@ class SkipDataset(UnionBaseDataset):
|
|
|
3492
3638
|
|
|
3493
3639
|
class TakeDataset(UnionBaseDataset):
|
|
3494
3640
|
"""
|
|
3495
|
-
The result of applying Take
|
|
3641
|
+
The result of applying Take operation to the input Dataset.
|
|
3496
3642
|
|
|
3497
3643
|
Args:
|
|
3498
3644
|
input_dataset (Dataset): Input Dataset to have elements taken from.
|
|
@@ -3509,7 +3655,7 @@ class TakeDataset(UnionBaseDataset):
|
|
|
3509
3655
|
|
|
3510
3656
|
class ZipDataset(UnionBaseDataset):
|
|
3511
3657
|
"""
|
|
3512
|
-
The result of applying Zip
|
|
3658
|
+
The result of applying Zip operation to the input Dataset.
|
|
3513
3659
|
|
|
3514
3660
|
Args:
|
|
3515
3661
|
datasets (tuple): A tuple of datasets to be zipped together.
|
|
@@ -3530,7 +3676,7 @@ class ZipDataset(UnionBaseDataset):
|
|
|
3530
3676
|
|
|
3531
3677
|
class ConcatDataset(UnionBaseDataset):
|
|
3532
3678
|
"""
|
|
3533
|
-
The result of applying
|
|
3679
|
+
The result of applying Concat operation to the input Dataset.
|
|
3534
3680
|
|
|
3535
3681
|
Args:
|
|
3536
3682
|
datasets (list): A list of datasets to be concatenated together.
|
|
@@ -3615,8 +3761,8 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
3615
3761
|
if hasattr(child, 'sampler') and child.sampler.get_num_samples() is not None:
|
|
3616
3762
|
raise ValueError("The parameter NumSamples of %s is not support to be set!" % child)
|
|
3617
3763
|
|
|
3618
|
-
if isinstance(child, BatchDataset):
|
|
3619
|
-
raise TypeError("The parameter %s of concat must not be BatchDataset!" % child)
|
|
3764
|
+
if isinstance(child, (BatchDataset, PaddedBatchDataset)):
|
|
3765
|
+
raise TypeError("The parameter %s of concat must not be BatchDataset or PaddedBatchDataset!" % child)
|
|
3620
3766
|
|
|
3621
3767
|
# if child is mappable and the length is greater than 0
|
|
3622
3768
|
if not self._children_flag_and_nums[index][0] and self._children_flag_and_nums[index][1]:
|
|
@@ -3641,7 +3787,7 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
3641
3787
|
|
|
3642
3788
|
class RenameDataset(UnionBaseDataset):
|
|
3643
3789
|
"""
|
|
3644
|
-
The result of applying Rename
|
|
3790
|
+
The result of applying Rename operation to the input Dataset.
|
|
3645
3791
|
|
|
3646
3792
|
Args:
|
|
3647
3793
|
input_dataset (Dataset): Input Dataset to be Renamed.
|
|
@@ -3670,7 +3816,7 @@ def to_list(items):
|
|
|
3670
3816
|
|
|
3671
3817
|
class ProjectDataset(UnionBaseDataset):
|
|
3672
3818
|
"""
|
|
3673
|
-
The result of applying Project
|
|
3819
|
+
The result of applying Project operation to the input Dataset.
|
|
3674
3820
|
|
|
3675
3821
|
Args:
|
|
3676
3822
|
input_dataset (Dataset): Input Dataset to be Projected.
|
|
@@ -3691,6 +3837,9 @@ class _ToDevice:
|
|
|
3691
3837
|
"""
|
|
3692
3838
|
|
|
3693
3839
|
def __init__(self, dataset, num_epochs):
|
|
3840
|
+
if get_debug_mode():
|
|
3841
|
+
logger.error("MindData debugger cannot be used in dataset sink mode. Please manually turn off "
|
|
3842
|
+
"sink mode and try debugger again.")
|
|
3694
3843
|
ir_tree, self.api_tree = dataset.create_ir_tree()
|
|
3695
3844
|
|
|
3696
3845
|
self._runtime_context = cde.PythonRuntimeContext()
|
|
@@ -3705,9 +3854,6 @@ class _ToDevice:
|
|
|
3705
3854
|
def send(self):
|
|
3706
3855
|
self._to_device.Send()
|
|
3707
3856
|
|
|
3708
|
-
def _reset(self, step):
|
|
3709
|
-
self._to_device.Reset(step)
|
|
3710
|
-
|
|
3711
3857
|
def stop_send(self):
|
|
3712
3858
|
"""
|
|
3713
3859
|
send stop send signal to pipeline, it is used when end of sequence is sent at the epoch end.
|
|
@@ -3746,16 +3892,19 @@ class _ToDevice:
|
|
|
3746
3892
|
offload_model = GetOffloadModel(self._to_device, col_names)
|
|
3747
3893
|
return offload_model
|
|
3748
3894
|
|
|
3895
|
+
def _reset(self, step, epoch):
|
|
3896
|
+
self._to_device.Reset(step, epoch)
|
|
3897
|
+
|
|
3749
3898
|
|
|
3750
3899
|
class TransferDataset(Dataset):
|
|
3751
3900
|
"""
|
|
3752
|
-
The result of applying TDT
|
|
3901
|
+
The result of applying TDT operation to the input Dataset.
|
|
3753
3902
|
|
|
3754
3903
|
Args:
|
|
3755
3904
|
input_dataset (Dataset): Input Dataset to be transferred.
|
|
3756
|
-
send_epoch_end (bool, optional): Whether to send end of sequence to device or not
|
|
3905
|
+
send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True.
|
|
3757
3906
|
create_data_info_queue (bool, optional): Whether to create queue which stores
|
|
3758
|
-
types and shapes of data or not
|
|
3907
|
+
types and shapes of data or not. Default: False.
|
|
3759
3908
|
|
|
3760
3909
|
Raises:
|
|
3761
3910
|
TypeError: If device_type is empty.
|
|
@@ -3816,11 +3965,6 @@ class TransferDataset(Dataset):
|
|
|
3816
3965
|
if self._to_device is not None:
|
|
3817
3966
|
self._to_device.continue_send()
|
|
3818
3967
|
|
|
3819
|
-
def _reset(self, step):
|
|
3820
|
-
if self._to_device is not None:
|
|
3821
|
-
logger.info("Reset the dataset pipeline to step " + str(step))
|
|
3822
|
-
self._to_device._reset(step) # pylint: disable=W0212
|
|
3823
|
-
|
|
3824
3968
|
def get_data_info(self):
|
|
3825
3969
|
"""
|
|
3826
3970
|
Get type and shape of current batch
|
|
@@ -3842,13 +3986,18 @@ class TransferDataset(Dataset):
|
|
|
3842
3986
|
if self._to_device is not None:
|
|
3843
3987
|
self._to_device.release()
|
|
3844
3988
|
|
|
3989
|
+
def _reset(self, step, epoch):
|
|
3990
|
+
if self._to_device is not None:
|
|
3991
|
+
logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(epoch))
|
|
3992
|
+
self._to_device._reset(step, epoch) # pylint: disable=protected-access
|
|
3993
|
+
|
|
3845
3994
|
|
|
3846
3995
|
class Schema:
|
|
3847
3996
|
"""
|
|
3848
3997
|
Class to represent a schema of a dataset.
|
|
3849
3998
|
|
|
3850
3999
|
Args:
|
|
3851
|
-
schema_file(str): Path of the schema file
|
|
4000
|
+
schema_file(str): Path of the schema file. Default: None.
|
|
3852
4001
|
|
|
3853
4002
|
Returns:
|
|
3854
4003
|
Schema object, schema info about dataset.
|
|
@@ -3877,11 +4026,17 @@ class Schema:
|
|
|
3877
4026
|
Args:
|
|
3878
4027
|
name (str): The new name of the column.
|
|
3879
4028
|
de_type (str): Data type of the column.
|
|
3880
|
-
shape (list[int], optional): Shape of the column
|
|
3881
|
-
|
|
4029
|
+
shape (list[int], optional): Shape of the column.
|
|
4030
|
+
Default: None, [-1] which is an unknown shape of rank 1.
|
|
3882
4031
|
|
|
3883
4032
|
Raises:
|
|
3884
4033
|
ValueError: If column type is unknown.
|
|
4034
|
+
|
|
4035
|
+
Examples:
|
|
4036
|
+
>>> from mindspore import dtype as mstype
|
|
4037
|
+
>>>
|
|
4038
|
+
>>> schema = ds.Schema()
|
|
4039
|
+
>>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
|
|
3885
4040
|
"""
|
|
3886
4041
|
if isinstance(de_type, typing.Type):
|
|
3887
4042
|
de_type = mstype_to_detype(de_type)
|
|
@@ -3926,6 +4081,12 @@ class Schema:
|
|
|
3926
4081
|
|
|
3927
4082
|
Returns:
|
|
3928
4083
|
str, JSON string of the schema.
|
|
4084
|
+
|
|
4085
|
+
Examples:
|
|
4086
|
+
>>> from mindspore.dataset import Schema
|
|
4087
|
+
>>>
|
|
4088
|
+
>>> schema1 = ds.Schema()
|
|
4089
|
+
>>> schema2 = schema1.to_json()
|
|
3929
4090
|
"""
|
|
3930
4091
|
return self.cpp_schema.to_json()
|
|
3931
4092
|
|
|
@@ -3940,6 +4101,16 @@ class Schema:
|
|
|
3940
4101
|
RuntimeError: if there is unknown item in the object.
|
|
3941
4102
|
RuntimeError: if dataset type is missing in the object.
|
|
3942
4103
|
RuntimeError: if columns are missing in the object.
|
|
4104
|
+
|
|
4105
|
+
Examples:
|
|
4106
|
+
>>> import json
|
|
4107
|
+
>>>
|
|
4108
|
+
>>> from mindspore.dataset import Schema
|
|
4109
|
+
>>>
|
|
4110
|
+
>>> with open("/path/to/schema_file") as file:
|
|
4111
|
+
... json_obj = json.load(file)
|
|
4112
|
+
... schema = ds.Schema()
|
|
4113
|
+
... schema.from_json(json_obj)
|
|
3943
4114
|
"""
|
|
3944
4115
|
self.cpp_schema.from_string(json.dumps(json_obj, indent=2))
|
|
3945
4116
|
|