mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -29,8 +29,10 @@ import atexit
|
|
|
29
29
|
import glob
|
|
30
30
|
import json
|
|
31
31
|
import os
|
|
32
|
+
import queue
|
|
32
33
|
import signal
|
|
33
34
|
import stat
|
|
35
|
+
import subprocess
|
|
34
36
|
import warnings
|
|
35
37
|
|
|
36
38
|
import gc
|
|
@@ -62,7 +64,7 @@ from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCE
|
|
|
62
64
|
from mindspore.parallel._utils import _get_device_num
|
|
63
65
|
from mindspore.dataset.debug import DebugHook
|
|
64
66
|
|
|
65
|
-
from . import samplers
|
|
67
|
+
from mindspore.dataset.engine import samplers
|
|
66
68
|
from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
|
|
67
69
|
ITERATORS_LIST, _unset_iterator_cleanup
|
|
68
70
|
from .queue import _SharedQueue, _Queue
|
|
@@ -127,8 +129,7 @@ def _reset_training_dataset(global_step, dataset_size):
|
|
|
127
129
|
"""
|
|
128
130
|
dataset = _get_training_dataset()
|
|
129
131
|
if dataset is not None:
|
|
130
|
-
|
|
131
|
-
dataset._reset(global_step, epoch) # pylint: disable=protected-access
|
|
132
|
+
dataset._reset(global_step, dataset_size) # pylint: disable=protected-access
|
|
132
133
|
else:
|
|
133
134
|
raise RuntimeError("Training dataset is not set.")
|
|
134
135
|
|
|
@@ -136,9 +137,9 @@ def _reset_training_dataset(global_step, dataset_size):
|
|
|
136
137
|
class Shuffle(str, Enum):
|
|
137
138
|
"""Specify the shuffle mode.
|
|
138
139
|
|
|
139
|
-
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
140
|
-
- Shuffle.FILES: Shuffle files only.
|
|
141
|
-
- Shuffle.INFILE: Shuffle data within each file.
|
|
140
|
+
- ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
|
|
141
|
+
- ``Shuffle.FILES`` : Shuffle files only.
|
|
142
|
+
- ``Shuffle.INFILE`` : Shuffle data within each file.
|
|
142
143
|
"""
|
|
143
144
|
GLOBAL: str = "global"
|
|
144
145
|
FILES: str = "files"
|
|
@@ -208,7 +209,7 @@ def zip(datasets):
|
|
|
208
209
|
The number of datasets must be more than 1.
|
|
209
210
|
|
|
210
211
|
Returns:
|
|
211
|
-
Dataset, dataset
|
|
212
|
+
Dataset, a new dataset with the above operation applied.
|
|
212
213
|
|
|
213
214
|
Raises:
|
|
214
215
|
ValueError: If the number of datasets is 1.
|
|
@@ -216,6 +217,10 @@ def zip(datasets):
|
|
|
216
217
|
|
|
217
218
|
Examples:
|
|
218
219
|
>>> # Create a dataset which is the combination of dataset_1 and dataset_2
|
|
220
|
+
>>> import mindspore.dataset as ds
|
|
221
|
+
>>>
|
|
222
|
+
>>> dataset_1 = ds.GeneratorDataset([1], "column1")
|
|
223
|
+
>>> dataset_2 = ds.GeneratorDataset([2], "column2")
|
|
219
224
|
>>> dataset = ds.zip((dataset_1, dataset_2))
|
|
220
225
|
"""
|
|
221
226
|
if len(datasets) <= 1:
|
|
@@ -316,7 +321,7 @@ class Dataset:
|
|
|
316
321
|
|
|
317
322
|
Args:
|
|
318
323
|
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel.
|
|
319
|
-
Default: None
|
|
324
|
+
Default: ``None``.
|
|
320
325
|
"""
|
|
321
326
|
|
|
322
327
|
def __init__(self, children=None, num_parallel_workers=None, cache=None):
|
|
@@ -346,6 +351,7 @@ class Dataset:
|
|
|
346
351
|
self._repeat_count = None
|
|
347
352
|
self._class_indexing = None
|
|
348
353
|
self._sync = False
|
|
354
|
+
self._global_step = None
|
|
349
355
|
|
|
350
356
|
@staticmethod
|
|
351
357
|
def _get_operator_id(dataset):
|
|
@@ -382,36 +388,42 @@ class Dataset:
|
|
|
382
388
|
_OP_PROCESS.update(generator_process)
|
|
383
389
|
return op_name
|
|
384
390
|
|
|
385
|
-
def create_ir_tree(self):
|
|
391
|
+
def create_ir_tree(self, getter_mode=False):
|
|
386
392
|
"""
|
|
387
393
|
Internal method to build an IR tree.
|
|
388
394
|
|
|
395
|
+
Args:
|
|
396
|
+
getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
|
|
397
|
+
|
|
389
398
|
Returns:
|
|
390
|
-
DatasetNode, the root node of the IR tree.
|
|
391
|
-
Dataset, the root dataset of the IR tree.
|
|
399
|
+
Union[DatasetNode, Dataset], the root node of the IR tree and the root dataset of the IR tree.
|
|
392
400
|
"""
|
|
393
401
|
parent = self.parent
|
|
394
402
|
self.parent = []
|
|
395
403
|
dataset = copy.deepcopy(self)
|
|
396
404
|
global _OP_NAME
|
|
397
405
|
_OP_NAME = Dataset._get_operator_id(dataset)
|
|
398
|
-
ir_tree = dataset.parse_tree()
|
|
406
|
+
ir_tree = dataset.parse_tree(getter_mode)
|
|
399
407
|
self.parent = parent
|
|
400
408
|
_init_device_info()
|
|
401
409
|
return ir_tree, dataset
|
|
402
410
|
|
|
403
|
-
def parse_tree(self):
|
|
411
|
+
def parse_tree(self, getter_mode=False):
|
|
404
412
|
"""
|
|
405
413
|
Internal method to parse the API tree into an IR tree.
|
|
406
414
|
|
|
415
|
+
Args:
|
|
416
|
+
getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``False``.
|
|
417
|
+
|
|
407
418
|
Returns:
|
|
408
419
|
DatasetNode, the root node of the IR tree.
|
|
409
420
|
"""
|
|
410
421
|
if len(self.parent) > 1:
|
|
411
422
|
raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)")
|
|
412
|
-
ir_children = [d.parse_tree() for d in self.children]
|
|
423
|
+
ir_children = [d.parse_tree(getter_mode) for d in self.children]
|
|
413
424
|
# Bootstrap can only be performed on a copy of the original dataset node.
|
|
414
425
|
# Bootstrap on original dataset node will make all iterators share the same process pool
|
|
426
|
+
self.pre_parse(getter_mode)
|
|
415
427
|
self.iterator_bootstrap()
|
|
416
428
|
ir_node = self.parse(ir_children)
|
|
417
429
|
ir_node = self.post_parse(ir_node)
|
|
@@ -450,12 +462,15 @@ class Dataset:
|
|
|
450
462
|
Serialize a pipeline into JSON string and dump into file if filename is provided.
|
|
451
463
|
|
|
452
464
|
Args:
|
|
453
|
-
filename (str): filename of JSON file to be saved as. Default:
|
|
465
|
+
filename (str): filename of JSON file to be saved as. Default: ``""``.
|
|
454
466
|
|
|
455
467
|
Returns:
|
|
456
468
|
str, JSON string of the pipeline.
|
|
457
469
|
|
|
458
470
|
Examples:
|
|
471
|
+
>>> import mindspore.dataset as ds
|
|
472
|
+
>>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
|
|
473
|
+
>>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
|
|
459
474
|
>>> dataset_json = dataset.to_json("/path/to/mnist_dataset_pipeline.json")
|
|
460
475
|
"""
|
|
461
476
|
ir_tree, _ = self.create_ir_tree()
|
|
@@ -489,7 +504,7 @@ class Dataset:
|
|
|
489
504
|
element_length_function (Callable, optional): A function that takes in
|
|
490
505
|
M arguments where M = len(column_names) and returns an integer. If no value
|
|
491
506
|
provided, parameter M the len(column_names) must be 1, and the size of the first
|
|
492
|
-
dimension of that column will be taken as the length. Default: None
|
|
507
|
+
dimension of that column will be taken as the length. Default: ``None``.
|
|
493
508
|
pad_info (dict, optional): The information about how to batch each column. The key
|
|
494
509
|
corresponds to the column name, and the value must be a tuple of 2 elements.
|
|
495
510
|
The first element corresponds to the shape to pad to, and the second
|
|
@@ -497,21 +512,22 @@ class Dataset:
|
|
|
497
512
|
specified, then that column will be padded to the longest in the current
|
|
498
513
|
batch, and 0 will be used as the padding value. Any None dimensions will
|
|
499
514
|
be padded to the longest in the current batch, unless if
|
|
500
|
-
`pad_to_bucket_boundary` is True
|
|
501
|
-
to None
|
|
502
|
-
pad_to_bucket_boundary (bool, optional): If True
|
|
515
|
+
`pad_to_bucket_boundary` is ``True``. If no padding is wanted, set `pad_info`
|
|
516
|
+
to ``None``. Default: ``None``.
|
|
517
|
+
pad_to_bucket_boundary (bool, optional): If ``True``, will pad each None
|
|
503
518
|
dimension in `pad_info` to the bucket_boundary minus 1. If there are any
|
|
504
519
|
elements that fall into the last bucket, an error will occur.
|
|
505
|
-
Default: False
|
|
506
|
-
drop_remainder (bool, optional): If True
|
|
507
|
-
bucket if it is not a full batch. Default: False
|
|
520
|
+
Default: ``False``.
|
|
521
|
+
drop_remainder (bool, optional): If ``True``, will drop the last batch for each
|
|
522
|
+
bucket if it is not a full batch. Default: ``False``.
|
|
508
523
|
|
|
509
524
|
Returns:
|
|
510
|
-
Dataset, dataset
|
|
525
|
+
Dataset, a new dataset with the above operation applied.
|
|
511
526
|
|
|
512
527
|
Examples:
|
|
513
528
|
>>> # Create a dataset where certain counts rows are combined into a batch
|
|
514
529
|
>>> # and drops the last incomplete batch if there is one.
|
|
530
|
+
>>> import mindspore.dataset as ds
|
|
515
531
|
>>> import numpy as np
|
|
516
532
|
>>> def generate_2_columns(n):
|
|
517
533
|
... for i in range(n):
|
|
@@ -553,15 +569,16 @@ class Dataset:
|
|
|
553
569
|
batch_size (Union[int, Callable]): The number of rows each batch is created with. An
|
|
554
570
|
int or callable object which takes exactly 1 parameter, BatchInfo.
|
|
555
571
|
drop_remainder (bool, optional): Determines whether or not to drop the last block
|
|
556
|
-
whose data row number is less than batch size. Default: False. If True,
|
|
557
|
-
than batch_size rows available to make the last batch,
|
|
558
|
-
be dropped and not propagated to the child node.
|
|
572
|
+
whose data row number is less than batch size. Default: ``False`` . If ``True`` ,
|
|
573
|
+
and if there are less than `batch_size` rows available to make the last batch,
|
|
574
|
+
then those rows will be dropped and not propagated to the child node.
|
|
559
575
|
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
|
|
560
|
-
Default: None.
|
|
576
|
+
Default: ``None`` .
|
|
561
577
|
**kwargs:
|
|
562
578
|
|
|
563
579
|
- per_batch_map (Callable[[List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo], \
|
|
564
|
-
(List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable.
|
|
580
|
+
(List[numpy.ndarray], ..., List[numpy.ndarray])], optional): Per batch map callable.
|
|
581
|
+
Default: ``None``.
|
|
565
582
|
A callable which takes (List[numpy.ndarray], ..., List[numpy.ndarray], BatchInfo) as input parameters.
|
|
566
583
|
Each list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists
|
|
567
584
|
should match with the number of entries in input_columns. The last parameter of the callable should
|
|
@@ -570,30 +587,41 @@ class Dataset:
|
|
|
570
587
|
as the input. output_columns is required if the number of output lists is different from input.
|
|
571
588
|
|
|
572
589
|
- input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of
|
|
573
|
-
the list should match with signature of per_batch_map callable. Default: None.
|
|
590
|
+
the list should match with signature of `per_batch_map` callable. Default: ``None`` .
|
|
574
591
|
|
|
575
592
|
- output_columns (Union[str, list[str]], optional): List of names assigned to the columns
|
|
576
593
|
outputted by the last operation. This parameter is mandatory if len(input_columns) !=
|
|
577
594
|
len(output_columns). The size of this list must match the number of output
|
|
578
|
-
columns of the last operation. Default: None, output columns will have the same
|
|
595
|
+
columns of the last operation. Default: ``None`` , output columns will have the same
|
|
579
596
|
name as the input columns, i.e., the columns will be replaced.
|
|
580
597
|
|
|
581
598
|
- python_multiprocessing (bool, optional): Parallelize Python function `per_batch_map` with
|
|
582
|
-
multi-processing or multi-threading mode, True means multi-processing,
|
|
583
|
-
If `per_batch_map` is a I/O bound task, use
|
|
584
|
-
If `per_batch_map` is a CPU bound task, it is recommended to use
|
|
585
|
-
Default: False, use python multi-threading mode.
|
|
586
|
-
|
|
587
|
-
- max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory
|
|
588
|
-
copy data between processes
|
|
599
|
+
multi-processing or multi-threading mode, ``True`` means multi-processing,
|
|
600
|
+
``False`` means multi-threading If `per_batch_map` is a I/O bound task, use
|
|
601
|
+
multi-threading mode. If `per_batch_map` is a CPU bound task, it is recommended to use
|
|
602
|
+
multi-processing mode. Default: ``False`` , use python multi-threading mode.
|
|
603
|
+
|
|
604
|
+
- max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
|
|
605
|
+
allocation to copy data between processes, the total occupied shared memory will increase as
|
|
606
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
|
|
607
|
+
used if python_multiprocessing is set to True. If it is an int value, it represents
|
|
608
|
+
``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
|
|
609
|
+
If it is a list, the first element represents the ``input_columns`` use this value as the unit to
|
|
610
|
+
create shared memory, and the second element represents ``output_columns`` use this value as the unit
|
|
611
|
+
to create shared memory. Default: 16.
|
|
589
612
|
|
|
590
613
|
Returns:
|
|
591
|
-
|
|
614
|
+
Dataset, a new dataset with the above operation applied.
|
|
592
615
|
|
|
593
616
|
Examples:
|
|
594
|
-
>>> # 1) Create a dataset where every
|
|
617
|
+
>>> # 1) Create a dataset where every 5 rows are combined into a batch
|
|
595
618
|
>>> # and drops the last incomplete batch if there is one.
|
|
596
|
-
>>> dataset
|
|
619
|
+
>>> import mindspore.dataset as ds
|
|
620
|
+
>>> from PIL import Image
|
|
621
|
+
>>>
|
|
622
|
+
>>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
|
|
623
|
+
>>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_samples=10)
|
|
624
|
+
>>> dataset = dataset.batch(5, True)
|
|
597
625
|
>>>
|
|
598
626
|
>>> # 2) resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
|
|
599
627
|
>>> def np_resize(col, BatchInfo):
|
|
@@ -633,11 +661,11 @@ class Dataset:
|
|
|
633
661
|
batch_size (Union[int, Callable]): The number of rows each batch is created with. An
|
|
634
662
|
int or callable object which takes exactly 1 parameter, BatchInfo.
|
|
635
663
|
drop_remainder (bool, optional): Determines whether or not to drop the last block
|
|
636
|
-
whose data row number is less than batch size. Default: False
|
|
637
|
-
than batch_size rows available to make the last batch, then those rows will
|
|
664
|
+
whose data row number is less than batch size. Default: ``False``. If ``True``, and if there
|
|
665
|
+
are less than batch_size rows available to make the last batch, then those rows will
|
|
638
666
|
be dropped and not propagated to the child node.
|
|
639
667
|
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel.
|
|
640
|
-
Default: None
|
|
668
|
+
Default: ``None``.
|
|
641
669
|
pad_info (dict, optional): The information about how to batch each column. The key
|
|
642
670
|
corresponds to the column name, and the value must be a tuple of 2 elements.
|
|
643
671
|
The first element corresponds to the shape to pad to, and the second
|
|
@@ -645,19 +673,22 @@ class Dataset:
|
|
|
645
673
|
specified, then that column will be padded to the longest in the current
|
|
646
674
|
batch, and 0 will be used as the padding value. Any None dimensions will
|
|
647
675
|
be padded to the longest in the current batch, unless if
|
|
648
|
-
pad_to_bucket_boundary is True. If no padding is wanted, set pad_info
|
|
649
|
-
to None
|
|
676
|
+
pad_to_bucket_boundary is True. If no padding is wanted, set `pad_info`
|
|
677
|
+
to ``None``. Default: ``None``.
|
|
650
678
|
|
|
651
679
|
Returns:
|
|
652
|
-
|
|
680
|
+
Dataset, a new dataset with the above operation applied.
|
|
653
681
|
|
|
654
682
|
Examples:
|
|
655
683
|
>>> # 1) Pad every sample to the largest sample's shape and batch the samples
|
|
656
|
-
>>>
|
|
684
|
+
>>> import mindspore.dataset as ds
|
|
685
|
+
>>> dataset = ds.NumpySlicesDataset([[1], [1, 2], [1, 2, 3], [1, 2, 3, 4]], "column1")
|
|
686
|
+
>>> dataset = dataset.padded_batch(2, True, pad_info={})
|
|
657
687
|
>>>
|
|
658
|
-
>>> # 2) Create a dataset where every
|
|
688
|
+
>>> # 2) Create a dataset where every 3 rows are combined into a batch
|
|
659
689
|
>>> # and drops the last incomplete batch if there is one.
|
|
660
|
-
>>> dataset =
|
|
690
|
+
>>> dataset = ds.NumpySlicesDataset([i for i in range(10)], "column1")
|
|
691
|
+
>>> dataset = dataset.padded_batch(3, True)
|
|
661
692
|
>>>
|
|
662
693
|
>>> # 3) Create a dataset where its batch size is dynamic
|
|
663
694
|
>>> # Define a callable batch size function and let batch size increase 1 each time.
|
|
@@ -674,16 +705,19 @@ class Dataset:
|
|
|
674
705
|
|
|
675
706
|
Args:
|
|
676
707
|
condition_name (str): The condition name that is used to toggle sending next row.
|
|
677
|
-
num_batch (int): the number of batches without blocking at the start of each epoch.
|
|
678
|
-
|
|
708
|
+
num_batch (int): the number of batches without blocking at the start of each epoch.
|
|
709
|
+
Default: ``1``.
|
|
710
|
+
callback (function): The callback function that will be invoked when sync_update is called.
|
|
711
|
+
Default: ``None``.
|
|
679
712
|
|
|
680
713
|
Returns:
|
|
681
|
-
|
|
714
|
+
Dataset, a new dataset with the above operation applied.
|
|
682
715
|
|
|
683
716
|
Raises:
|
|
684
717
|
RuntimeError: If condition name already exists.
|
|
685
718
|
|
|
686
719
|
Examples:
|
|
720
|
+
>>> import mindspore.dataset as ds
|
|
687
721
|
>>> import numpy as np
|
|
688
722
|
>>> def gen():
|
|
689
723
|
... for i in range(100):
|
|
@@ -735,15 +769,18 @@ class Dataset:
|
|
|
735
769
|
dataset will result in a global shuffle.
|
|
736
770
|
|
|
737
771
|
Returns:
|
|
738
|
-
Dataset, dataset
|
|
772
|
+
Dataset, a new dataset with the above operation applied.
|
|
739
773
|
|
|
740
774
|
Raises:
|
|
741
775
|
RuntimeError: If exist sync operations before shuffle.
|
|
742
776
|
|
|
743
777
|
Examples:
|
|
744
|
-
>>>
|
|
745
|
-
>>>
|
|
778
|
+
>>> import mindspore.dataset as ds
|
|
779
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
780
|
+
>>>
|
|
781
|
+
>>> # Optionally set the seed for fixed randomness
|
|
746
782
|
>>> ds.config.set_seed(58)
|
|
783
|
+
>>>
|
|
747
784
|
>>> # Create a shuffled dataset using a shuffle buffer of size 4
|
|
748
785
|
>>> dataset = dataset.shuffle(4)
|
|
749
786
|
"""
|
|
@@ -758,9 +795,10 @@ class Dataset:
|
|
|
758
795
|
return a `Dataset` .
|
|
759
796
|
|
|
760
797
|
Returns:
|
|
761
|
-
Dataset, dataset
|
|
798
|
+
Dataset, a new dataset with the above operation applied.
|
|
762
799
|
|
|
763
800
|
Examples:
|
|
801
|
+
>>> import mindspore.dataset as ds
|
|
764
802
|
>>> # 1) flat_map on one column dataset
|
|
765
803
|
>>> dataset = ds.NumpySlicesDataset([[0, 1], [2, 3]], shuffle=False)
|
|
766
804
|
>>>
|
|
@@ -820,11 +858,11 @@ class Dataset:
|
|
|
820
858
|
`output_columns` , and if not specified, the column name of output column is same as that of `input_columns` .
|
|
821
859
|
|
|
822
860
|
- If you use transformations (
|
|
823
|
-
`vision transform <https://mindspore.cn/docs/en/r2.
|
|
861
|
+
`vision transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
|
|
824
862
|
dataset.transforms.html#module-mindspore.dataset.vision>`_ ,
|
|
825
|
-
`nlp transform <https://mindspore.cn/docs/en/r2.
|
|
863
|
+
`nlp transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
|
|
826
864
|
dataset.transforms.html#module-mindspore.dataset.text>`_ ,
|
|
827
|
-
`audio transform <https://mindspore.cn/docs/en/r2.
|
|
865
|
+
`audio transform <https://mindspore.cn/docs/en/r2.2/api_python/mindspore.\
|
|
828
866
|
dataset.transforms.html#module-mindspore.dataset.audio>`_ )
|
|
829
867
|
provided by mindspore dataset, please use the following parameters:
|
|
830
868
|
|
|
@@ -839,31 +877,37 @@ class Dataset:
|
|
|
839
877
|
applied on the dataset. Operations are applied in the order they appear in this list.
|
|
840
878
|
input_columns (Union[str, list[str]], optional): List of the names of the columns that will be passed to
|
|
841
879
|
the first operation as input. The size of this list must match the number of
|
|
842
|
-
input columns expected by the first operation. Default: None
|
|
880
|
+
input columns expected by the first operation. Default: ``None``, the first
|
|
843
881
|
operation will be passed however many columns that are required, starting from
|
|
844
882
|
the first column.
|
|
845
883
|
output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
|
|
846
884
|
the last operation. This parameter is mandatory if len(input_columns) !=
|
|
847
885
|
len(output_columns). The size of this list must match the number of output
|
|
848
|
-
columns of the last operation. Default: None
|
|
886
|
+
columns of the last operation. Default: ``None``, output columns will have the same
|
|
849
887
|
name as the input columns, i.e., the columns will be replaced.
|
|
850
888
|
num_parallel_workers (int, optional): Number of threads used to process the dataset in
|
|
851
|
-
parallel. Default: None
|
|
889
|
+
parallel. Default: ``None``, the value from the configuration will be used.
|
|
852
890
|
**kwargs:
|
|
853
891
|
|
|
854
892
|
- python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes.
|
|
855
|
-
This option could be beneficial if the Python operation is computational heavy. Default: False
|
|
893
|
+
This option could be beneficial if the Python operation is computational heavy. Default: ``False``.
|
|
856
894
|
|
|
857
|
-
- max_rowsize (int, optional): Maximum size of row in MB that is used for shared
|
|
858
|
-
copy data between processes
|
|
895
|
+
- max_rowsize (Union[int, list[int]], optional): Maximum size of row in MB that is used for shared
|
|
896
|
+
memory allocation to copy data between processes, the total occupied shared memory will increase as
|
|
897
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
|
|
898
|
+
used if python_multiprocessing is set to True. If it is an int value, it represents
|
|
899
|
+
``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
|
|
900
|
+
If it is a list, the first element represents the ``input_columns`` use this value as the unit to
|
|
901
|
+
create shared memory, and the second element represents ``output_columns`` use this value as the unit
|
|
902
|
+
to create shared memory. Default: 16.
|
|
859
903
|
|
|
860
904
|
- cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
|
861
|
-
Default: None
|
|
905
|
+
Default: ``None``, which means no cache is used.
|
|
862
906
|
|
|
863
907
|
- callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called.
|
|
864
|
-
Default: None
|
|
908
|
+
Default: ``None``.
|
|
865
909
|
|
|
866
|
-
- offload (bool, optional): Flag to indicate whether offload is used. Default: None
|
|
910
|
+
- offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
|
|
867
911
|
|
|
868
912
|
Note:
|
|
869
913
|
- Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined
|
|
@@ -872,17 +916,21 @@ class Dataset:
|
|
|
872
916
|
`operations` .
|
|
873
917
|
|
|
874
918
|
Returns:
|
|
875
|
-
Dataset, dataset
|
|
919
|
+
Dataset, a new dataset with the above operation applied.
|
|
876
920
|
|
|
877
921
|
Examples:
|
|
922
|
+
>>> import mindspore.dataset as ds
|
|
923
|
+
>>> import mindspore.dataset.vision as vision
|
|
878
924
|
>>> # dataset is an instance of Dataset which has 2 columns, "image" and "label".
|
|
879
925
|
>>> # image is of type bytes type which can be decoded to RGB
|
|
880
926
|
>>> # label is of type int32
|
|
927
|
+
>>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
|
|
928
|
+
>>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir)
|
|
881
929
|
>>>
|
|
882
930
|
>>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
|
|
883
|
-
>>> decode_op =
|
|
884
|
-
>>> random_jitter_op =
|
|
885
|
-
...
|
|
931
|
+
>>> decode_op = vision.Decode(to_pil=False)
|
|
932
|
+
>>> random_jitter_op = vision.RandomColorAdjust(brightness=(0.8, 0.8), contrast=(1, 1),
|
|
933
|
+
... saturation=(1, 1), hue=(0, 0))
|
|
886
934
|
>>>
|
|
887
935
|
>>> # 1) Simple map example.
|
|
888
936
|
>>>
|
|
@@ -948,16 +996,19 @@ class Dataset:
|
|
|
948
996
|
Args:
|
|
949
997
|
predicate (callable): Python callable which returns a boolean value. If False then filter the element.
|
|
950
998
|
input_columns (Union[str, list[str]], optional): List of names of the input columns. If not provided
|
|
951
|
-
or provided with None
|
|
999
|
+
or provided with ``None``, the predicate will be applied on all columns in the dataset.
|
|
1000
|
+
Default: ``None``.
|
|
952
1001
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
953
|
-
in parallel. Default: None
|
|
1002
|
+
in parallel. Default: ``None``.
|
|
954
1003
|
|
|
955
1004
|
Returns:
|
|
956
|
-
Dataset, dataset
|
|
1005
|
+
Dataset, a new dataset with the above operation applied.
|
|
957
1006
|
|
|
958
1007
|
Examples:
|
|
959
|
-
>>> # generator data(0 ~
|
|
1008
|
+
>>> # generator data(0 ~ 19)
|
|
960
1009
|
>>> # filter the data that greater than or equal to 11
|
|
1010
|
+
>>> import mindspore.dataset as ds
|
|
1011
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(20)], "data")
|
|
961
1012
|
>>> dataset = dataset.filter(predicate=lambda data: data < 11, input_columns = ["data"])
|
|
962
1013
|
"""
|
|
963
1014
|
return FilterDataset(self, predicate, input_columns, num_parallel_workers)
|
|
@@ -965,20 +1016,21 @@ class Dataset:
|
|
|
965
1016
|
@check_repeat
|
|
966
1017
|
def repeat(self, count=None):
|
|
967
1018
|
"""
|
|
968
|
-
Repeat this dataset `count` times. Repeat infinitely if the count is None or
|
|
1019
|
+
Repeat this dataset `count` times. Repeat infinitely if the `count` is ``None`` or ``-1``.
|
|
969
1020
|
|
|
970
1021
|
Note:
|
|
971
1022
|
The order of using repeat and batch reflects the number of batches. It is recommended that
|
|
972
1023
|
the repeat operation is used after the batch operation.
|
|
973
1024
|
|
|
974
1025
|
Args:
|
|
975
|
-
count (int): Number of times the dataset is going to be repeated. Default: None
|
|
1026
|
+
count (int): Number of times the dataset is going to be repeated. Default: ``None``.
|
|
976
1027
|
|
|
977
1028
|
Returns:
|
|
978
|
-
Dataset, dataset
|
|
1029
|
+
Dataset, a new dataset with the above operation applied.
|
|
979
1030
|
|
|
980
1031
|
Examples:
|
|
981
|
-
>>>
|
|
1032
|
+
>>> import mindspore.dataset as ds
|
|
1033
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
982
1034
|
>>>
|
|
983
1035
|
>>> # Create a dataset where the dataset is repeated for 50 epochs
|
|
984
1036
|
>>> dataset = dataset.repeat(50)
|
|
@@ -1004,11 +1056,12 @@ class Dataset:
|
|
|
1004
1056
|
count (int): Number of elements in the dataset to be skipped.
|
|
1005
1057
|
|
|
1006
1058
|
Returns:
|
|
1007
|
-
Dataset,
|
|
1059
|
+
Dataset, a new dataset with the above operation applied.
|
|
1008
1060
|
|
|
1009
1061
|
Examples:
|
|
1010
|
-
>>>
|
|
1011
|
-
>>>
|
|
1062
|
+
>>> import mindspore.dataset as ds
|
|
1063
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1064
|
+
>>> # Skip first 3 elements of dataset and retain 7 elements.
|
|
1012
1065
|
>>> dataset = dataset.skip(3)
|
|
1013
1066
|
"""
|
|
1014
1067
|
return SkipDataset(self, count)
|
|
@@ -1016,23 +1069,28 @@ class Dataset:
|
|
|
1016
1069
|
@check_take
|
|
1017
1070
|
def take(self, count=-1):
|
|
1018
1071
|
"""
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
Note:
|
|
1022
|
-
1. If count is greater than the number of elements in the dataset or equal to -1,
|
|
1023
|
-
all the elements in dataset will be taken.
|
|
1024
|
-
2. The order of using take and batch matters. If take is before batch operation,
|
|
1025
|
-
then take the given number of rows; otherwise take the given number of batches.
|
|
1072
|
+
Take the first specified number of samples from the dataset.
|
|
1026
1073
|
|
|
1027
1074
|
Args:
|
|
1028
|
-
count (int, optional):
|
|
1075
|
+
count (int, optional): The desired number of samples to take. If the value exceeds
|
|
1076
|
+
the total number of samples in the dataset, all data will be returned.
|
|
1077
|
+
Default: ``-1`` , will return all data.
|
|
1078
|
+
|
|
1079
|
+
Note:
|
|
1080
|
+
When there are operations that will change the number of samples of the dataset in
|
|
1081
|
+
the data pipeline, the location of the `take` operation can change its effect.
|
|
1082
|
+
For example, `batch` operation will combine the successive samples of the specified
|
|
1083
|
+
`batch_size` into 1 sample, so `.batch(batch_size).take(1)` will be equivalent to
|
|
1084
|
+
`.take(batch_size).batch(batch_size)`.
|
|
1029
1085
|
|
|
1030
1086
|
Returns:
|
|
1031
|
-
Dataset, dataset
|
|
1087
|
+
Dataset, a new dataset with the above operation applied.
|
|
1032
1088
|
|
|
1033
1089
|
Examples:
|
|
1034
|
-
>>>
|
|
1035
|
-
>>>
|
|
1090
|
+
>>> import mindspore.dataset as ds
|
|
1091
|
+
>>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
|
|
1092
|
+
>>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir)
|
|
1093
|
+
>>> # Take 50 samples from MNIST dataset.
|
|
1036
1094
|
>>> dataset = dataset.take(50)
|
|
1037
1095
|
"""
|
|
1038
1096
|
return TakeDataset(self, count)
|
|
@@ -1113,7 +1171,7 @@ class Dataset:
|
|
|
1113
1171
|
- The sum of split sizes > K, the difference of sigma(round(fi * K)) - K will be removed from the first
|
|
1114
1172
|
large enough split such that it will have at least 1 row after removing the difference.
|
|
1115
1173
|
|
|
1116
|
-
randomize (bool, optional): Determines whether or not to split the data randomly. Default: True
|
|
1174
|
+
randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
|
|
1117
1175
|
If True, the data will be randomly split. Otherwise, each split will be created with
|
|
1118
1176
|
consecutive rows from the dataset.
|
|
1119
1177
|
|
|
@@ -1124,7 +1182,7 @@ class Dataset:
|
|
|
1124
1182
|
will be different in each epoch.
|
|
1125
1183
|
|
|
1126
1184
|
Returns:
|
|
1127
|
-
|
|
1185
|
+
Tuple[Dataset], a tuple of new datasets split from the original one.
|
|
1128
1186
|
|
|
1129
1187
|
Raises:
|
|
1130
1188
|
RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
|
|
@@ -1136,9 +1194,9 @@ class Dataset:
|
|
|
1136
1194
|
floats don't sum to 1.
|
|
1137
1195
|
|
|
1138
1196
|
Examples:
|
|
1139
|
-
>>> #
|
|
1140
|
-
>>>
|
|
1141
|
-
>>> dataset = ds.
|
|
1197
|
+
>>> # Split the data into train part and test part.
|
|
1198
|
+
>>> import mindspore.dataset as ds
|
|
1199
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1142
1200
|
>>> train_dataset, test_dataset = dataset.split([0.9, 0.1])
|
|
1143
1201
|
"""
|
|
1144
1202
|
if self.is_shuffled():
|
|
@@ -1179,14 +1237,17 @@ class Dataset:
|
|
|
1179
1237
|
to be zipped together with this dataset.
|
|
1180
1238
|
|
|
1181
1239
|
Returns:
|
|
1182
|
-
Dataset, dataset
|
|
1240
|
+
Dataset, a new dataset with the above operation applied.
|
|
1183
1241
|
|
|
1184
1242
|
Raises:
|
|
1185
1243
|
TypeError: The parameter is not dataset object or tuple of dataset objects.
|
|
1186
1244
|
|
|
1187
1245
|
Examples:
|
|
1188
|
-
>>> # Create a dataset which is the combination of
|
|
1189
|
-
>>> dataset
|
|
1246
|
+
>>> # Create a dataset which is the combination of dataset_1 and dataset_2
|
|
1247
|
+
>>> import mindspore.dataset as ds
|
|
1248
|
+
>>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1")
|
|
1249
|
+
>>> dataset_2 = ds.GeneratorDataset([1, 2, 3], "column2")
|
|
1250
|
+
>>> dataset = dataset_1.zip(dataset_2)
|
|
1190
1251
|
"""
|
|
1191
1252
|
if isinstance(datasets, tuple):
|
|
1192
1253
|
datasets = (self, *datasets)
|
|
@@ -1202,6 +1263,12 @@ class Dataset:
|
|
|
1202
1263
|
Concatenate the dataset objects in the input list.
|
|
1203
1264
|
Performing "+" operation on dataset objects can achieve the same effect.
|
|
1204
1265
|
|
|
1266
|
+
For a dataset concatenated by many other dataset objects, it returns the data in the order of
|
|
1267
|
+
datasets passed in. If you want to change the data order(such as random selection from each dataset
|
|
1268
|
+
instead of in sequence), apply `use_sampler` method on the concatenated dataset object.
|
|
1269
|
+
Currently `use_sampler` supports `dataset.DistributedSampler` for sharding selection from each dataset
|
|
1270
|
+
or `dataset.RandomSampler` for random selection from each dataset, see examples below.
|
|
1271
|
+
|
|
1205
1272
|
Note:
|
|
1206
1273
|
The column name, and rank and type of the column data must be the same in the input datasets.
|
|
1207
1274
|
|
|
@@ -1210,13 +1277,45 @@ class Dataset:
|
|
|
1210
1277
|
to be concatenated together with this dataset.
|
|
1211
1278
|
|
|
1212
1279
|
Returns:
|
|
1213
|
-
Dataset, dataset
|
|
1280
|
+
Dataset, a new dataset with the above operation applied.
|
|
1214
1281
|
|
|
1215
1282
|
Examples:
|
|
1283
|
+
>>> import mindspore.dataset as ds
|
|
1284
|
+
>>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
|
|
1285
|
+
>>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
|
|
1286
|
+
>>>
|
|
1216
1287
|
>>> # Create a dataset by concatenating dataset_1 and dataset_2 with "+" operator
|
|
1217
1288
|
>>> dataset = dataset_1 + dataset_2
|
|
1218
1289
|
>>> # Create a dataset by concatenating dataset_1 and dataset_2 with concat operation
|
|
1219
1290
|
>>> dataset = dataset_1.concat(dataset_2)
|
|
1291
|
+
>>>
|
|
1292
|
+
>>> # Check the data order of dataset
|
|
1293
|
+
>>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
|
|
1294
|
+
>>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
|
|
1295
|
+
>>> dataset = dataset_1 + dataset_2
|
|
1296
|
+
>>> result = list(dataset)
|
|
1297
|
+
>>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 2)],
|
|
1298
|
+
>>> # [Tensor(shape=[], dtype=Int64, value= 3)], [Tensor(shape=[], dtype=Int64, value= 4)],
|
|
1299
|
+
>>> # [Tensor(shape=[], dtype=Int64, value= 5)], [Tensor(shape=[], dtype=Int64, value= 6)]]
|
|
1300
|
+
>>>
|
|
1301
|
+
>>> # Change the data order of concatenated dataset with sharding selection
|
|
1302
|
+
>>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
|
|
1303
|
+
>>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
|
|
1304
|
+
>>> dataset = dataset_1.concat(dataset_2)
|
|
1305
|
+
>>> dataset.use_sampler(ds.DistributedSampler(num_shards=2, shard_id=1, shuffle=False))
|
|
1306
|
+
>>> result = list(dataset)
|
|
1307
|
+
>>> # [[Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 4)],
|
|
1308
|
+
>>> # [Tensor(shape=[], dtype=Int64, value= 6)]]
|
|
1309
|
+
>>>
|
|
1310
|
+
>>> # Change the data order of concatenated dataset with random selection
|
|
1311
|
+
>>> dataset_1 = ds.GeneratorDataset([1, 2, 3], "column1", shuffle=False)
|
|
1312
|
+
>>> dataset_2 = ds.GeneratorDataset([4, 5, 6], "column1", shuffle=False)
|
|
1313
|
+
>>> dataset = dataset_1.concat(dataset_2)
|
|
1314
|
+
>>> dataset.use_sampler(ds.RandomSampler())
|
|
1315
|
+
>>> result = list(dataset)
|
|
1316
|
+
>>> # [[Tensor(shape=[], dtype=Int64, value= 1)], [Tensor(shape=[], dtype=Int64, value= 4)],
|
|
1317
|
+
>>> # [Tensor(shape=[], dtype=Int64, value= 2)], [Tensor(shape=[], dtype=Int64, value= 5)],
|
|
1318
|
+
>>> # [Tensor(shape=[], dtype=Int64, value= 6)], [Tensor(shape=[], dtype=Int64, value= 3)]]
|
|
1220
1319
|
"""
|
|
1221
1320
|
if isinstance(datasets, Dataset):
|
|
1222
1321
|
datasets = [self] + [datasets]
|
|
@@ -1236,16 +1335,17 @@ class Dataset:
|
|
|
1236
1335
|
output_columns (Union[str, list[str]]): List of names of the output columns.
|
|
1237
1336
|
|
|
1238
1337
|
Returns:
|
|
1239
|
-
Dataset, dataset
|
|
1338
|
+
Dataset, a new dataset with the above operation applied.
|
|
1240
1339
|
|
|
1241
1340
|
Examples:
|
|
1242
|
-
>>>
|
|
1341
|
+
>>> import mindspore.dataset as ds
|
|
1243
1342
|
>>> input_columns = ["input_col1", "input_col2", "input_col3"]
|
|
1244
1343
|
>>> output_columns = ["output_col1", "output_col2", "output_col3"]
|
|
1245
1344
|
>>>
|
|
1246
|
-
>>> # Create a dataset
|
|
1247
|
-
>>>
|
|
1248
|
-
>>>
|
|
1345
|
+
>>> # Create a dataset with 3 columns
|
|
1346
|
+
>>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
|
|
1347
|
+
>>>
|
|
1348
|
+
>>> # Rename "input_col1" to "output_col1", "input_col2" to "output_col2", "input_col3" to "output_col3"
|
|
1249
1349
|
>>> dataset = dataset.rename(input_columns=input_columns, output_columns=output_columns)
|
|
1250
1350
|
"""
|
|
1251
1351
|
|
|
@@ -1261,13 +1361,15 @@ class Dataset:
|
|
|
1261
1361
|
columns(Union[str, list[str]]): List of names of the columns to project.
|
|
1262
1362
|
|
|
1263
1363
|
Returns:
|
|
1264
|
-
Dataset, dataset
|
|
1364
|
+
Dataset, a new dataset with the above operation applied.
|
|
1265
1365
|
|
|
1266
1366
|
Examples:
|
|
1267
|
-
>>>
|
|
1268
|
-
>>>
|
|
1367
|
+
>>> import mindspore.dataset as ds
|
|
1368
|
+
>>> # Create a dataset with 3 columns
|
|
1369
|
+
>>> input_columns = ["column1", "column2", "column3"]
|
|
1370
|
+
>>> dataset = ds.GeneratorDataset([(1, 2, 3), (3, 4, 5), (5, 6, 7)], column_names=input_columns)
|
|
1269
1371
|
>>>
|
|
1270
|
-
>>>
|
|
1372
|
+
>>> columns_to_project = ["column3", "column1", "column2"]
|
|
1271
1373
|
>>> # in that order, regardless of the original order of columns.
|
|
1272
1374
|
>>> dataset = dataset.project(columns=columns_to_project)
|
|
1273
1375
|
"""
|
|
@@ -1283,10 +1385,11 @@ class Dataset:
|
|
|
1283
1385
|
return a preprocessed `Dataset` .
|
|
1284
1386
|
|
|
1285
1387
|
Returns:
|
|
1286
|
-
Dataset, dataset
|
|
1388
|
+
Dataset, a new dataset with the above operation applied.
|
|
1287
1389
|
|
|
1288
1390
|
Examples:
|
|
1289
|
-
>>>
|
|
1391
|
+
>>> import mindspore.dataset as ds
|
|
1392
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1290
1393
|
>>>
|
|
1291
1394
|
>>> # Declare an apply_func function which returns a Dataset object
|
|
1292
1395
|
>>> def apply_func(data):
|
|
@@ -1310,41 +1413,45 @@ class Dataset:
|
|
|
1310
1413
|
return dataset
|
|
1311
1414
|
|
|
1312
1415
|
@check_device_send
|
|
1313
|
-
def device_que(self, send_epoch_end=True, create_data_info_queue=False):
|
|
1416
|
+
def device_que(self, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
|
|
1314
1417
|
"""
|
|
1315
1418
|
Return a transferred Dataset that transfers data through a device.
|
|
1316
1419
|
|
|
1317
1420
|
Args:
|
|
1318
|
-
send_epoch_end (bool, optional): Whether to send end of sequence to device or not.
|
|
1421
|
+
send_epoch_end (bool, optional): Whether to send end of sequence to device or not.
|
|
1422
|
+
Default: ``True``.
|
|
1319
1423
|
create_data_info_queue (bool, optional): Whether to create queue which stores
|
|
1320
|
-
types and shapes of data or not. Default: False
|
|
1424
|
+
types and shapes of data or not. Default: ``False``.
|
|
1425
|
+
queue_name (str, optional): Name of queue which connects dataset processing and model
|
|
1426
|
+
computing. Default: ``""``.
|
|
1321
1427
|
|
|
1322
1428
|
Note:
|
|
1323
1429
|
If device is Ascend, features of data will be transferred one by one. The limitation
|
|
1324
1430
|
of data transmission per time is 256M.
|
|
1325
1431
|
|
|
1326
1432
|
Returns:
|
|
1327
|
-
Dataset, dataset
|
|
1433
|
+
Dataset, a new dataset with the above operation applied.
|
|
1328
1434
|
|
|
1329
1435
|
Examples:
|
|
1436
|
+
>>> import mindspore.dataset as ds
|
|
1330
1437
|
>>> import time
|
|
1331
1438
|
>>>
|
|
1332
1439
|
>>> data = ds.TFRecordDataset('/path/to/TF_FILES', '/path/to/TF_SCHEMA_FILE', shuffle=ds.Shuffle.FILES)
|
|
1333
|
-
>>>
|
|
1334
1440
|
>>> data = data.device_que()
|
|
1335
1441
|
>>> data.send()
|
|
1336
1442
|
>>> time.sleep(0.1)
|
|
1337
1443
|
>>> data.stop_send()
|
|
1338
1444
|
"""
|
|
1339
|
-
return TransferDataset(self, send_epoch_end, create_data_info_queue)
|
|
1445
|
+
return TransferDataset(self, send_epoch_end, create_data_info_queue, queue_name)
|
|
1340
1446
|
|
|
1341
1447
|
@check_save
|
|
1342
1448
|
def save(self, file_name, num_files=1, file_type='mindrecord'):
|
|
1343
1449
|
"""
|
|
1344
1450
|
Save the dynamic data processed by the dataset pipeline in common dataset format.
|
|
1345
|
-
Supported dataset formats:
|
|
1451
|
+
Supported dataset formats: ``'mindrecord'`` only. And you can use
|
|
1452
|
+
:class:`mindspore.dataset.MindDataset` API to read the saved file(s).
|
|
1346
1453
|
|
|
1347
|
-
Implicit type casting exists when saving data as
|
|
1454
|
+
Implicit type casting exists when saving data as ``'mindrecord'`` . The transform table shows how to do
|
|
1348
1455
|
type casting.
|
|
1349
1456
|
|
|
1350
1457
|
.. list-table:: Implicit Type Casting when Saving as `mindrecord`
|
|
@@ -1395,27 +1502,27 @@ class Dataset:
|
|
|
1395
1502
|
- Multi-dimensional string not supported
|
|
1396
1503
|
|
|
1397
1504
|
Note:
|
|
1398
|
-
1. To save the samples in order, set dataset's shuffle to False and num_files to 1
|
|
1505
|
+
1. To save the samples in order, set dataset's `shuffle` to ``False`` and `num_files` to ``1``.
|
|
1399
1506
|
2. Before calling the function, do not use batch operation, repeat operation or data augmentation operations
|
|
1400
1507
|
with random attribute in map operation.
|
|
1401
1508
|
3. When array dimension is variable, one-dimensional arrays or
|
|
1402
1509
|
multi-dimensional arrays with variable dimension 0 are supported.
|
|
1403
|
-
4.
|
|
1510
|
+
4. MindRecord does not support uint64, multi-dimensional uint8(drop dimension) nor
|
|
1404
1511
|
multi-dimensional string.
|
|
1405
1512
|
|
|
1406
1513
|
Args:
|
|
1407
1514
|
file_name (str): Path to dataset file.
|
|
1408
|
-
num_files (int, optional): Number of dataset files. Default: 1.
|
|
1409
|
-
file_type (str, optional): Dataset format. Default: 'mindrecord'.
|
|
1515
|
+
num_files (int, optional): Number of dataset files. Default: ``1`` .
|
|
1516
|
+
file_type (str, optional): Dataset format. Default: ``'mindrecord'`` .
|
|
1410
1517
|
|
|
1411
1518
|
Examples:
|
|
1519
|
+
>>> import mindspore.dataset as ds
|
|
1412
1520
|
>>> import numpy as np
|
|
1413
1521
|
>>>
|
|
1414
1522
|
>>> def generator_1d():
|
|
1415
1523
|
... for i in range(10):
|
|
1416
1524
|
... yield (np.array([i]),)
|
|
1417
1525
|
>>>
|
|
1418
|
-
>>>
|
|
1419
1526
|
>>> # apply dataset operations
|
|
1420
1527
|
>>> d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
|
1421
1528
|
>>> d1.save('/path/to/save_file')
|
|
@@ -1442,19 +1549,21 @@ class Dataset:
|
|
|
1442
1549
|
|
|
1443
1550
|
Args:
|
|
1444
1551
|
columns (list[str], optional): List of columns to be used to specify the order of columns.
|
|
1445
|
-
Default: None
|
|
1552
|
+
Default: ``None``, means all columns.
|
|
1446
1553
|
num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
|
|
1447
|
-
Default:
|
|
1554
|
+
Default: ``-1``, iterator can be iterated infinite number of epochs.
|
|
1448
1555
|
output_numpy (bool, optional): Whether or not to output NumPy datatype.
|
|
1449
|
-
If output_numpy
|
|
1450
|
-
do_copy (bool, optional): When output data type is mindspore.Tensor
|
|
1451
|
-
use this param to select the conversion method, only take False for better performance.
|
|
1556
|
+
If `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False``.
|
|
1557
|
+
do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
|
|
1558
|
+
use this param to select the conversion method, only take False for better performance.
|
|
1559
|
+
Default: ``True``.
|
|
1452
1560
|
|
|
1453
1561
|
Returns:
|
|
1454
|
-
Iterator,
|
|
1562
|
+
Iterator, a dataset iterator that returns data of type Tuple.
|
|
1455
1563
|
|
|
1456
1564
|
Examples:
|
|
1457
|
-
>>>
|
|
1565
|
+
>>> import mindspore.dataset as ds
|
|
1566
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1458
1567
|
>>> iterator = dataset.create_tuple_iterator()
|
|
1459
1568
|
>>> for item in iterator:
|
|
1460
1569
|
... # item is a list
|
|
@@ -1476,17 +1585,19 @@ class Dataset:
|
|
|
1476
1585
|
|
|
1477
1586
|
Args:
|
|
1478
1587
|
num_epochs (int, optional): Maximum number of epochs that iterator can be iterated.
|
|
1479
|
-
Default:
|
|
1588
|
+
Default: ``-1`` , iterator can be iterated infinite number of epochs.
|
|
1480
1589
|
output_numpy (bool, optional): Whether or not to output NumPy datatype,
|
|
1481
|
-
if output_numpy
|
|
1482
|
-
do_copy (bool, optional): When output data type is mindspore.Tensor
|
|
1483
|
-
use this param to select the conversion method, only take False for better performance.
|
|
1590
|
+
if `output_numpy` is ``False``, iterator will output MSTensor. Default: ``False`` .
|
|
1591
|
+
do_copy (bool, optional): When output data type is :class:`mindspore.Tensor`,
|
|
1592
|
+
use this param to select the conversion method, only take False for better performance.
|
|
1593
|
+
Default: ``True`` .
|
|
1484
1594
|
|
|
1485
1595
|
Returns:
|
|
1486
|
-
Iterator,
|
|
1596
|
+
Iterator, a dataset iterator that returns data of type Dict.
|
|
1487
1597
|
|
|
1488
1598
|
Examples:
|
|
1489
|
-
>>>
|
|
1599
|
+
>>> import mindspore.dataset as ds
|
|
1600
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1490
1601
|
>>> iterator = dataset.create_dict_iterator()
|
|
1491
1602
|
>>> for item in iterator:
|
|
1492
1603
|
... # item is a dict
|
|
@@ -1515,7 +1626,8 @@ class Dataset:
|
|
|
1515
1626
|
int, tuple of the input index information.
|
|
1516
1627
|
|
|
1517
1628
|
Examples:
|
|
1518
|
-
>>>
|
|
1629
|
+
>>> import mindspore.dataset as ds
|
|
1630
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1519
1631
|
>>> # set input_indexs
|
|
1520
1632
|
>>> dataset.input_indexs = 10
|
|
1521
1633
|
>>> print(dataset.input_indexs)
|
|
@@ -1542,11 +1654,14 @@ class Dataset:
|
|
|
1542
1654
|
def copy_batch_size(self, value):
|
|
1543
1655
|
self._batch_size = value
|
|
1544
1656
|
|
|
1545
|
-
def _init_tree_getters(self):
|
|
1657
|
+
def _init_tree_getters(self, getter_mode=True):
|
|
1546
1658
|
"""
|
|
1547
1659
|
Get pipeline information.
|
|
1660
|
+
|
|
1661
|
+
Args:
|
|
1662
|
+
getter_mode (bool, optional): Whether to build IR tree in pull mode. Default: ``True``.
|
|
1548
1663
|
"""
|
|
1549
|
-
ir_tree, api_tree = self.create_ir_tree()
|
|
1664
|
+
ir_tree, api_tree = self.create_ir_tree(getter_mode)
|
|
1550
1665
|
|
|
1551
1666
|
runtime_context = cde.PythonRuntimeContext()
|
|
1552
1667
|
runtime_context.Init()
|
|
@@ -1576,8 +1691,12 @@ class Dataset:
|
|
|
1576
1691
|
list, list of column names in the dataset.
|
|
1577
1692
|
|
|
1578
1693
|
Examples:
|
|
1579
|
-
>>>
|
|
1694
|
+
>>> import mindspore.dataset as ds
|
|
1695
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1580
1696
|
>>> col_names = dataset.get_col_names()
|
|
1697
|
+
>>> print(col_names)
|
|
1698
|
+
['column1']
|
|
1699
|
+
|
|
1581
1700
|
"""
|
|
1582
1701
|
if self._col_names is None:
|
|
1583
1702
|
runtime_getter = self._init_tree_getters()
|
|
@@ -1591,22 +1710,26 @@ class Dataset:
|
|
|
1591
1710
|
Get the shapes of output data.
|
|
1592
1711
|
|
|
1593
1712
|
Args:
|
|
1594
|
-
estimate (bool): If `estimate` is False, will return the shapes of first data row.
|
|
1713
|
+
estimate (bool): If `estimate` is ``False`` , will return the shapes of first data row.
|
|
1595
1714
|
Otherwise, will iterate the whole dataset and return the estimated shapes of data row,
|
|
1596
|
-
where dynamic shape is marked as None (used in dynamic data shapes scenario).
|
|
1715
|
+
where dynamic shape is marked as None (used in dynamic data shapes scenario).
|
|
1716
|
+
Default: ``False`` .
|
|
1597
1717
|
|
|
1598
1718
|
Returns:
|
|
1599
1719
|
list, list of shapes of each column.
|
|
1600
1720
|
|
|
1601
1721
|
Examples:
|
|
1722
|
+
>>> import mindspore.dataset as ds
|
|
1602
1723
|
>>> import numpy as np
|
|
1603
1724
|
>>>
|
|
1604
1725
|
>>> def generator1():
|
|
1605
1726
|
... for i in range(1, 100):
|
|
1606
|
-
... yield np.ones((16,
|
|
1727
|
+
... yield np.ones((16, 83, 83)), np.array([i])
|
|
1607
1728
|
>>>
|
|
1608
1729
|
>>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
|
|
1609
1730
|
>>> output_shapes = dataset.output_shapes()
|
|
1731
|
+
>>> print(output_shapes)
|
|
1732
|
+
[[16, 83, 83], [1]]
|
|
1610
1733
|
"""
|
|
1611
1734
|
# cache single shape
|
|
1612
1735
|
if not estimate and self.saved_output_shapes is not None:
|
|
@@ -1641,8 +1764,17 @@ class Dataset:
|
|
|
1641
1764
|
list, list of data types.
|
|
1642
1765
|
|
|
1643
1766
|
Examples:
|
|
1644
|
-
>>>
|
|
1767
|
+
>>> import mindspore.dataset as ds
|
|
1768
|
+
>>> import numpy as np
|
|
1769
|
+
>>>
|
|
1770
|
+
>>> def generator1():
|
|
1771
|
+
... for i in range(1, 100):
|
|
1772
|
+
... yield np.ones((16, 83, 83)).astype(np.float32), np.array([i]).astype(np.int32)
|
|
1773
|
+
>>>
|
|
1774
|
+
>>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
|
|
1645
1775
|
>>> output_types = dataset.output_types()
|
|
1776
|
+
>>> print(output_types)
|
|
1777
|
+
[dtype('float32'), dtype('int32')]
|
|
1646
1778
|
"""
|
|
1647
1779
|
if self.saved_output_types is None:
|
|
1648
1780
|
runtime_getter = self._init_tree_getters()
|
|
@@ -1666,8 +1798,18 @@ class Dataset:
|
|
|
1666
1798
|
int, number of batches.
|
|
1667
1799
|
|
|
1668
1800
|
Examples:
|
|
1669
|
-
>>>
|
|
1801
|
+
>>> import mindspore.dataset as ds
|
|
1802
|
+
>>> import numpy as np
|
|
1803
|
+
>>>
|
|
1804
|
+
>>> # A generator return 66 samples
|
|
1805
|
+
>>> def generator1():
|
|
1806
|
+
... for i in range(66):
|
|
1807
|
+
... yield np.ones((16, 83, 83)), np.array([i])
|
|
1808
|
+
>>>
|
|
1809
|
+
>>> dataset = ds.GeneratorDataset(generator1, ["data1", "data2"])
|
|
1670
1810
|
>>> dataset_size = dataset.get_dataset_size()
|
|
1811
|
+
>>> print(dataset_size)
|
|
1812
|
+
66
|
|
1671
1813
|
"""
|
|
1672
1814
|
if self.dataset_size is None:
|
|
1673
1815
|
runtime_getter = self.__init_size_getter()
|
|
@@ -1685,7 +1827,11 @@ class Dataset:
|
|
|
1685
1827
|
int, number of classes.
|
|
1686
1828
|
|
|
1687
1829
|
Examples:
|
|
1688
|
-
>>>
|
|
1830
|
+
>>> import mindspore.dataset as ds
|
|
1831
|
+
>>> # Read image files
|
|
1832
|
+
>>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
|
|
1833
|
+
>>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
|
|
1834
|
+
>>> # Check how many classes exist in image folder
|
|
1689
1835
|
>>> num_classes = dataset.num_classes()
|
|
1690
1836
|
"""
|
|
1691
1837
|
if self._num_classes is None:
|
|
@@ -1718,19 +1864,18 @@ class Dataset:
|
|
|
1718
1864
|
Args:
|
|
1719
1865
|
condition_name (str): The condition name that is used to toggle sending next row.
|
|
1720
1866
|
num_batch (Union[int, None]): The number of batches (rows) that are released.
|
|
1721
|
-
When num_batch is None
|
|
1722
|
-
sync_wait operation. Default: None
|
|
1723
|
-
data (Any): The data passed to the callback, user defined. Default: None
|
|
1867
|
+
When `num_batch` is ``None``, it will default to the number specified by the
|
|
1868
|
+
`sync_wait` operation. Default: ``None``.
|
|
1869
|
+
data (Any): The data passed to the callback, user defined. Default: ``None``.
|
|
1724
1870
|
|
|
1725
1871
|
Examples:
|
|
1726
1872
|
>>> import numpy as np
|
|
1727
|
-
>>>
|
|
1873
|
+
>>> import mindspore.dataset as ds
|
|
1728
1874
|
>>>
|
|
1729
1875
|
>>> def gen():
|
|
1730
1876
|
... for i in range(100):
|
|
1731
1877
|
... yield (np.array(i),)
|
|
1732
1878
|
>>>
|
|
1733
|
-
>>>
|
|
1734
1879
|
>>> class Augment:
|
|
1735
1880
|
... def __init__(self, loss):
|
|
1736
1881
|
... self.loss = loss
|
|
@@ -1741,7 +1886,6 @@ class Dataset:
|
|
|
1741
1886
|
... def update(self, data):
|
|
1742
1887
|
... self.loss = data["loss"]
|
|
1743
1888
|
>>>
|
|
1744
|
-
>>>
|
|
1745
1889
|
>>> batch_size = 10
|
|
1746
1890
|
>>> dataset = ds.GeneratorDataset(gen, column_names=["input"])
|
|
1747
1891
|
>>> aug = Augment(0)
|
|
@@ -1780,8 +1924,12 @@ class Dataset:
|
|
|
1780
1924
|
int, the batch size of data.
|
|
1781
1925
|
|
|
1782
1926
|
Examples:
|
|
1783
|
-
>>>
|
|
1927
|
+
>>> import mindspore.dataset as ds
|
|
1928
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1929
|
+
>>> dataset = dataset.batch(2)
|
|
1784
1930
|
>>> batch_size = dataset.get_batch_size()
|
|
1931
|
+
>>> print(batch_size)
|
|
1932
|
+
2
|
|
1785
1933
|
"""
|
|
1786
1934
|
if self._batch_size is None:
|
|
1787
1935
|
runtime_getter = self._init_tree_getters()
|
|
@@ -1792,14 +1940,18 @@ class Dataset:
|
|
|
1792
1940
|
|
|
1793
1941
|
def get_repeat_count(self):
|
|
1794
1942
|
"""
|
|
1795
|
-
Get the replication times in RepeatDataset. Default: 1.
|
|
1943
|
+
Get the replication times in RepeatDataset. Default: ``1`` .
|
|
1796
1944
|
|
|
1797
1945
|
Returns:
|
|
1798
1946
|
int, the count of repeat.
|
|
1799
1947
|
|
|
1800
1948
|
Examples:
|
|
1801
|
-
>>>
|
|
1949
|
+
>>> import mindspore.dataset as ds
|
|
1950
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
1951
|
+
>>> dataset = dataset.repeat(5)
|
|
1802
1952
|
>>> repeat_count = dataset.get_repeat_count()
|
|
1953
|
+
>>> print(repeat_count)
|
|
1954
|
+
5
|
|
1803
1955
|
"""
|
|
1804
1956
|
if self._repeat_count is None:
|
|
1805
1957
|
runtime_getter = self._init_tree_getters()
|
|
@@ -1810,15 +1962,19 @@ class Dataset:
|
|
|
1810
1962
|
|
|
1811
1963
|
def get_class_indexing(self):
|
|
1812
1964
|
"""
|
|
1813
|
-
|
|
1965
|
+
Get the mapping dictionary from category names to category indexes.
|
|
1966
|
+
|
|
1967
|
+
This dictionary can be used to look up which category name corresponds to a particular category index.
|
|
1814
1968
|
|
|
1815
1969
|
Returns:
|
|
1816
|
-
|
|
1817
|
-
dict, a str-to-list<int> mapping from label name to index for Coco ONLY. The second number
|
|
1818
|
-
in the list is used to indicate the super category.
|
|
1970
|
+
Dict[str, int], the mappings from category names to category indexes.
|
|
1819
1971
|
|
|
1820
1972
|
Examples:
|
|
1821
|
-
>>>
|
|
1973
|
+
>>> import mindspore.dataset as ds
|
|
1974
|
+
>>> # Read image files
|
|
1975
|
+
>>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
|
|
1976
|
+
>>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
|
|
1977
|
+
>>> # Check how many classes exist in image folder
|
|
1822
1978
|
>>> class_indexing = dataset.get_class_indexing()
|
|
1823
1979
|
"""
|
|
1824
1980
|
if self.children:
|
|
@@ -1830,6 +1986,7 @@ class Dataset:
|
|
|
1830
1986
|
Reset the dataset for next epoch.
|
|
1831
1987
|
|
|
1832
1988
|
Examples:
|
|
1989
|
+
>>> import mindspore.dataset as ds
|
|
1833
1990
|
>>> mind_dataset_dir = ["/path/to/mind_dataset_file"]
|
|
1834
1991
|
>>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
|
|
1835
1992
|
>>> for _ in range(5):
|
|
@@ -1882,6 +2039,13 @@ class Dataset:
|
|
|
1882
2039
|
shard_id = 0
|
|
1883
2040
|
return num_shards, shard_id
|
|
1884
2041
|
|
|
2042
|
+
def pre_parse(self, getter_mode):
|
|
2043
|
+
if getter_mode:
|
|
2044
|
+
if hasattr(self, "python_multiprocessing"):
|
|
2045
|
+
self.python_multiprocessing = False
|
|
2046
|
+
if hasattr(self, "num_parallel_workers"):
|
|
2047
|
+
self.num_parallel_workers = 1
|
|
2048
|
+
|
|
1885
2049
|
def post_parse(self, ir_node):
|
|
1886
2050
|
if self.cache:
|
|
1887
2051
|
ir_node = ir_node.set_cache_client(self.cache.cache_client)
|
|
@@ -1890,6 +2054,18 @@ class Dataset:
|
|
|
1890
2054
|
|
|
1891
2055
|
return ir_node
|
|
1892
2056
|
|
|
2057
|
+
def set_init_step(self, init_step):
|
|
2058
|
+
self._global_step = init_step
|
|
2059
|
+
|
|
2060
|
+
def get_init_step(self):
|
|
2061
|
+
if self._global_step is not None:
|
|
2062
|
+
return self._global_step
|
|
2063
|
+
if len(self.children) == 1:
|
|
2064
|
+
return self.children[0].get_init_step()
|
|
2065
|
+
# When there are multiple children, we cannot tell from which child to get the initial step,
|
|
2066
|
+
# so we initialize from the beginning
|
|
2067
|
+
return 0
|
|
2068
|
+
|
|
1893
2069
|
|
|
1894
2070
|
class VisionBaseDataset(Dataset):
|
|
1895
2071
|
"""
|
|
@@ -1968,9 +2144,9 @@ class TextBaseDataset(Dataset):
|
|
|
1968
2144
|
Returns:
|
|
1969
2145
|
SentencePieceVocab, vocab built from the dataset.
|
|
1970
2146
|
"""
|
|
1971
|
-
warnings.warn("mindspore.dataset.Dataset.
|
|
2147
|
+
warnings.warn("mindspore.dataset.Dataset.build_sentencepiece_vocab is deprecated from version 2.0 "
|
|
1972
2148
|
"and will be removed in a future version. "
|
|
1973
|
-
"Use mindspore.dataset.text.
|
|
2149
|
+
"Use mindspore.dataset.text.SentencePieceVocab.from_dataset instead.", DeprecationWarning)
|
|
1974
2150
|
|
|
1975
2151
|
def _build_vocab(self, columns, freq_range, top_k, special_tokens, special_first):
|
|
1976
2152
|
"""
|
|
@@ -2193,8 +2369,11 @@ class MappableDataset(SourceDataset):
|
|
|
2193
2369
|
new_sampler (Sampler): The child sampler to be added.
|
|
2194
2370
|
|
|
2195
2371
|
Examples:
|
|
2372
|
+
>>> import mindspore.dataset as ds
|
|
2373
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
2374
|
+
>>>
|
|
2196
2375
|
>>> new_sampler = ds.DistributedSampler(10, 2)
|
|
2197
|
-
>>> dataset.add_sampler(new_sampler)
|
|
2376
|
+
>>> dataset.add_sampler(new_sampler)
|
|
2198
2377
|
"""
|
|
2199
2378
|
# Note: By adding a sampler, the sampled IDs will flow to the new_sampler
|
|
2200
2379
|
# after first passing through the current samplers attached to this dataset.
|
|
@@ -2210,7 +2389,9 @@ class MappableDataset(SourceDataset):
|
|
|
2210
2389
|
new_sampler (Sampler): The new sampler to replace with.
|
|
2211
2390
|
|
|
2212
2391
|
Examples:
|
|
2213
|
-
>>>
|
|
2392
|
+
>>> import mindspore.dataset as ds
|
|
2393
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(10)], "column1")
|
|
2394
|
+
>>>
|
|
2214
2395
|
>>> # use a DistributedSampler instead
|
|
2215
2396
|
>>> new_sampler = ds.DistributedSampler(10, 2)
|
|
2216
2397
|
>>> dataset.use_sampler(new_sampler)
|
|
@@ -2251,24 +2432,25 @@ class MappableDataset(SourceDataset):
|
|
|
2251
2432
|
- The sum of split sizes > K, the difference will be removed from the first large
|
|
2252
2433
|
enough split such that it will have at least 1 row after removing the difference.
|
|
2253
2434
|
|
|
2254
|
-
randomize (bool, optional): Determines whether or not to split the data randomly. Default: True
|
|
2255
|
-
If True
|
|
2435
|
+
randomize (bool, optional): Determines whether or not to split the data randomly. Default: ``True``.
|
|
2436
|
+
If ``True``, the data will be randomly split. Otherwise, each split will be created with
|
|
2256
2437
|
consecutive rows from the dataset.
|
|
2257
2438
|
|
|
2258
2439
|
Note:
|
|
2259
2440
|
1. There is an optimized split function, which will be called automatically when the dataset
|
|
2260
2441
|
that calls this function is a MappableDataset.
|
|
2261
2442
|
2. Dataset should not be sharded if split is going to be called. Instead, create a
|
|
2262
|
-
DistributedSampler and specify a split to shard after splitting.
|
|
2263
|
-
sharded after a split, it is strongly recommended setting the same
|
|
2264
|
-
of execution, otherwise each shard may not be part of the same
|
|
2265
|
-
|
|
2443
|
+
:class:`mindspore.dataset.DistributedSampler` and specify a split to shard after splitting.
|
|
2444
|
+
If the dataset is sharded after a split, it is strongly recommended setting the same
|
|
2445
|
+
seed in each instance of execution, otherwise each shard may not be part of the same
|
|
2446
|
+
split (see Examples).
|
|
2447
|
+
3. It is strongly recommended to not shuffle the dataset, but set `randomize` to ``True`` instead.
|
|
2266
2448
|
Shuffling the dataset may not be deterministic, which means the data in each split
|
|
2267
2449
|
will be different in each epoch. Furthermore, if sharding occurs after split, each
|
|
2268
2450
|
shard may not be part of the same split.
|
|
2269
2451
|
|
|
2270
2452
|
Returns:
|
|
2271
|
-
|
|
2453
|
+
Tuple[Dataset], a tuple of new datasets split from the original one.
|
|
2272
2454
|
|
|
2273
2455
|
Raises:
|
|
2274
2456
|
RuntimeError: If get_dataset_size returns None or is not supported for this dataset.
|
|
@@ -2280,7 +2462,9 @@ class MappableDataset(SourceDataset):
|
|
|
2280
2462
|
floats don't sum to 1.
|
|
2281
2463
|
|
|
2282
2464
|
Examples:
|
|
2465
|
+
>>> import mindspore.dataset as ds
|
|
2283
2466
|
>>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
|
|
2467
|
+
>>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
|
|
2284
2468
|
>>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir, shuffle=False)
|
|
2285
2469
|
>>>
|
|
2286
2470
|
>>> # Set the seed, and tell split to use this seed when randomizing.
|
|
@@ -2348,7 +2532,7 @@ class BucketBatchByLengthDataset(UnionBaseDataset):
|
|
|
2348
2532
|
self.pad_to_bucket_boundary, self.drop_remainder)
|
|
2349
2533
|
|
|
2350
2534
|
|
|
2351
|
-
def _check_shm_usage(num_worker, queue_size,
|
|
2535
|
+
def _check_shm_usage(num_worker, queue_size, in_rowsize, out_rowsize):
|
|
2352
2536
|
"""
|
|
2353
2537
|
Check sufficient shared memory is available for shared memory queues
|
|
2354
2538
|
when training in parallel mode.
|
|
@@ -2358,10 +2542,10 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
|
|
|
2358
2542
|
device_num = _get_device_num()
|
|
2359
2543
|
# In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
|
|
2360
2544
|
# on the ascend server is 8.
|
|
2361
|
-
if device_num > 1
|
|
2545
|
+
if device_num > 1:
|
|
2362
2546
|
device_num = min(device_num, 8)
|
|
2363
|
-
shm_estimate_usage = device_num * num_worker *
|
|
2364
|
-
(queue_size + 2) *
|
|
2547
|
+
shm_estimate_usage = device_num * num_worker * \
|
|
2548
|
+
(queue_size + 2) * (in_rowsize + out_rowsize) * 1024 * 1024
|
|
2365
2549
|
try:
|
|
2366
2550
|
shm_available = psutil.disk_usage('/dev/shm').free
|
|
2367
2551
|
if shm_estimate_usage >= threshold_ratio * shm_available:
|
|
@@ -2385,10 +2569,10 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2385
2569
|
batch_size (Union[int, function]): The number of rows each batch is created with. An
|
|
2386
2570
|
int or callable which takes exactly 1 parameter, BatchInfo.
|
|
2387
2571
|
drop_remainder (bool, optional): Determines whether or not to drop the last
|
|
2388
|
-
possibly incomplete batch. Default: False
|
|
2572
|
+
possibly incomplete batch. Default: ``False``. If True, and if there are less
|
|
2389
2573
|
than batch_size rows available to make the last batch, then those rows will
|
|
2390
2574
|
be dropped and not propagated to the child node.
|
|
2391
|
-
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None
|
|
2575
|
+
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
|
|
2392
2576
|
per_batch_map (callable, optional): Per batch map callable. A callable which takes
|
|
2393
2577
|
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch of
|
|
2394
2578
|
Tensors on a given column. The number of lists should match with number of entries in input_columns. The
|
|
@@ -2398,10 +2582,16 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2398
2582
|
output_columns (Union[str, list[str]], optional): List of names assigned to the columns outputted by
|
|
2399
2583
|
the last operation. This parameter is mandatory if len(input_columns) !=
|
|
2400
2584
|
len(output_columns). The size of this list must match the number of output
|
|
2401
|
-
columns of the last operation. Default: None
|
|
2585
|
+
columns of the last operation. Default: ``None``, output columns will have the same
|
|
2402
2586
|
name as the input columns, i.e., the columns will be replaced.
|
|
2403
|
-
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory
|
|
2404
|
-
data between processes
|
|
2587
|
+
max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
|
|
2588
|
+
allocation to copy data between processes, the total occupied shared memory will increase as
|
|
2589
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
|
|
2590
|
+
used if python_multiprocessing is set to True. If it is an int value, it represents
|
|
2591
|
+
``input_columns`` and ``output_columns`` use this value as the unit to create shared memory.
|
|
2592
|
+
If it is a list, the first element represents the ``input_columns`` use this value as the unit to
|
|
2593
|
+
create shared memory, and the second element represents ``output_columns`` use this value as the unit
|
|
2594
|
+
to create shared memory. Default: 16.
|
|
2405
2595
|
|
|
2406
2596
|
"""
|
|
2407
2597
|
|
|
@@ -2427,7 +2617,10 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2427
2617
|
|
|
2428
2618
|
self.python_multiprocessing = python_multiprocessing
|
|
2429
2619
|
self.process_pool = None
|
|
2430
|
-
|
|
2620
|
+
if isinstance(max_rowsize, int):
|
|
2621
|
+
self.max_rowsize = [max_rowsize * self.batch_size] * 2
|
|
2622
|
+
else:
|
|
2623
|
+
self.max_rowsize = [max_rowsize[0] * self.batch_size, max_rowsize[1] * self.batch_size]
|
|
2431
2624
|
|
|
2432
2625
|
def __del__(self):
|
|
2433
2626
|
if hasattr(self, "process_pool") and self.process_pool is not None:
|
|
@@ -2497,7 +2690,7 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2497
2690
|
self.num_parallel_workers = get_num_parallel_workers()
|
|
2498
2691
|
|
|
2499
2692
|
self.process_pool = _PythonMultiprocessing(str(self), self.num_parallel_workers, [self.per_batch_map],
|
|
2500
|
-
self.max_rowsize
|
|
2693
|
+
self.max_rowsize)
|
|
2501
2694
|
# Wrap per_batch_map into _PythonCallable
|
|
2502
2695
|
self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
|
|
2503
2696
|
else:
|
|
@@ -2507,19 +2700,53 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2507
2700
|
|
|
2508
2701
|
class BatchInfo(cde.CBatchInfo):
|
|
2509
2702
|
"""
|
|
2510
|
-
|
|
2511
|
-
|
|
2703
|
+
This class helps to get dataset information dynamically when the input of `batch_size` or `per_batch_map`
|
|
2704
|
+
in `batch` operation is a callable object.
|
|
2512
2705
|
"""
|
|
2513
2706
|
|
|
2514
2707
|
def get_batch_num(self):
|
|
2515
2708
|
"""
|
|
2516
|
-
Return the batch number
|
|
2709
|
+
Return the batch number being processed in current epoch, start from 0.
|
|
2710
|
+
|
|
2711
|
+
Examples:
|
|
2712
|
+
>>> # Create a dataset where its batch size is dynamic
|
|
2713
|
+
>>> # Define a callable batch size function and let batch size increase 1 each time.
|
|
2714
|
+
>>> import mindspore.dataset as ds
|
|
2715
|
+
>>> from mindspore.dataset import BatchInfo
|
|
2716
|
+
>>>
|
|
2717
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(3)], "column1", shuffle=False)
|
|
2718
|
+
>>> def add_one(BatchInfo):
|
|
2719
|
+
... return BatchInfo.get_batch_num() + 1
|
|
2720
|
+
>>> dataset = dataset.batch(batch_size=add_one)
|
|
2721
|
+
>>> print(list(dataset))
|
|
2722
|
+
[[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[2], dtype=Int64, value= [1, 2])]]
|
|
2517
2723
|
"""
|
|
2518
2724
|
return
|
|
2519
2725
|
|
|
2520
2726
|
def get_epoch_num(self):
|
|
2521
2727
|
"""
|
|
2522
|
-
Return the epoch number
|
|
2728
|
+
Return the epoch number, start from 0.
|
|
2729
|
+
|
|
2730
|
+
Examples:
|
|
2731
|
+
>>> # Create a dataset where its batch size is dynamic
|
|
2732
|
+
>>> # Define a callable batch size function and let batch size increase 1 each epoch.
|
|
2733
|
+
>>> import mindspore.dataset as ds
|
|
2734
|
+
>>> from mindspore.dataset import BatchInfo
|
|
2735
|
+
>>>
|
|
2736
|
+
>>> dataset = ds.GeneratorDataset([i for i in range(4)], "column1", shuffle=False)
|
|
2737
|
+
>>> def add_one_by_epoch(BatchInfo):
|
|
2738
|
+
... return BatchInfo.get_epoch_num() + 1
|
|
2739
|
+
>>> dataset = dataset.batch(batch_size=add_one_by_epoch)
|
|
2740
|
+
>>>
|
|
2741
|
+
>>> result = []
|
|
2742
|
+
>>> epoch = 2
|
|
2743
|
+
>>> iterator = dataset.create_tuple_iterator(num_epochs=epoch)
|
|
2744
|
+
>>> for i in range(epoch):
|
|
2745
|
+
... result.extend(list(iterator))
|
|
2746
|
+
>>> # result:
|
|
2747
|
+
>>> # [[Tensor(shape=[1], dtype=Int64, value= [0])], [Tensor(shape=[1], dtype=Int64, value= [1])],
|
|
2748
|
+
>>> # [Tensor(shape=[1], dtype=Int64, value= [2])], [Tensor(shape=[1], dtype=Int64, value= [3])],
|
|
2749
|
+
>>> # [Tensor(shape=[2], dtype=Int64, value= [0, 1])], [Tensor(shape=[2], dtype=Int64, value= [2, 3])]]
|
|
2523
2750
|
"""
|
|
2524
2751
|
return
|
|
2525
2752
|
|
|
@@ -2530,7 +2757,7 @@ class BlockReleasePair:
|
|
|
2530
2757
|
|
|
2531
2758
|
Args:
|
|
2532
2759
|
init_release_rows (int): Number of lines to allow through the pipeline.
|
|
2533
|
-
callback (function): The callback function that will be called when release is called. Default: None
|
|
2760
|
+
callback (function): The callback function that will be called when release is called. Default: ``None``.
|
|
2534
2761
|
"""
|
|
2535
2762
|
|
|
2536
2763
|
def __init__(self, init_release_rows, callback=None):
|
|
@@ -2602,10 +2829,10 @@ class PaddedBatchDataset(UnionBaseDataset):
|
|
|
2602
2829
|
batch_size (Union[int, function]): The number of rows each batch is created with. An
|
|
2603
2830
|
int or callable which takes exactly 1 parameter, BatchInfo.
|
|
2604
2831
|
drop_remainder (bool, optional): Determines whether or not to drop the last
|
|
2605
|
-
possibly incomplete batch. Default: False
|
|
2832
|
+
possibly incomplete batch. Default: ``False``. If True, and if there are less
|
|
2606
2833
|
than batch_size rows available to make the last batch, then those rows will
|
|
2607
2834
|
be dropped and not propagated to the child node.
|
|
2608
|
-
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: None
|
|
2835
|
+
num_parallel_workers (int, optional): Number of workers to process the dataset in parallel. Default: ``None``.
|
|
2609
2836
|
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
|
2610
2837
|
will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
|
|
2611
2838
|
"""
|
|
@@ -2675,7 +2902,7 @@ class SyncWaitDataset(UnionBaseDataset):
|
|
|
2675
2902
|
input_dataset (Dataset): Input dataset to apply flow control.
|
|
2676
2903
|
num_batch (int): Number of batches without blocking at the start of each epoch.
|
|
2677
2904
|
condition_name (str): Condition name that is used to toggle sending next row.
|
|
2678
|
-
callback (function): Callback function that will be invoked when sync_update is called. Default: None
|
|
2905
|
+
callback (function): Callback function that will be invoked when sync_update is called. Default: ``None``.
|
|
2679
2906
|
|
|
2680
2907
|
Raises:
|
|
2681
2908
|
RuntimeError: If condition name already exists.
|
|
@@ -2782,24 +3009,11 @@ class _PythonCallable:
|
|
|
2782
3009
|
self.pool = pool
|
|
2783
3010
|
# Python callable index
|
|
2784
3011
|
self.idx = idx
|
|
2785
|
-
self.check_interval = get_multiprocessing_timeout_interval()
|
|
2786
3012
|
|
|
2787
3013
|
def __call__(self, *args):
|
|
2788
3014
|
result = None
|
|
2789
|
-
start_time = time.time()
|
|
2790
|
-
count = 1
|
|
2791
3015
|
get_data_from_worker_process = False
|
|
2792
3016
|
while get_data_from_worker_process is False:
|
|
2793
|
-
cost_time = time.time() - start_time
|
|
2794
|
-
if cost_time > (self.check_interval * count):
|
|
2795
|
-
logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
|
|
2796
|
-
"workers of map operation cost long time to process next data. "
|
|
2797
|
-
"Worker process list are: " + str(self.pool.get_pids()) + ", you can use "
|
|
2798
|
-
"\"py-spy dump -p {PID} -l -s \""
|
|
2799
|
-
"to dump the worker process stack. You can also set the timeout interval by "
|
|
2800
|
-
"ds.config.set_multiprocessing_interval to adjust the output frequency of this "
|
|
2801
|
-
"log.")
|
|
2802
|
-
count += 1
|
|
2803
3017
|
if self.pool.is_running() and check_iterator_cleanup() is False:
|
|
2804
3018
|
try:
|
|
2805
3019
|
result = self.pool.execute(self.idx, *args)
|
|
@@ -2820,6 +3034,7 @@ class _PythonCallable:
|
|
|
2820
3034
|
return self.py_callable.to_json()
|
|
2821
3035
|
|
|
2822
3036
|
|
|
3037
|
+
# used when python_multiprocessing=True in map
|
|
2823
3038
|
class Pipe:
|
|
2824
3039
|
"""
|
|
2825
3040
|
Class to handle communication between the master process and the worker processes.
|
|
@@ -2829,29 +3044,34 @@ class Pipe:
|
|
|
2829
3044
|
self.shared_memory = shared_memory
|
|
2830
3045
|
self.eof = multiprocessing.Event()
|
|
2831
3046
|
if self.shared_memory:
|
|
2832
|
-
self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
|
|
2833
|
-
self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize)
|
|
3047
|
+
self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
|
|
3048
|
+
self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
|
|
2834
3049
|
else:
|
|
2835
3050
|
self.in_queue = _Queue(1)
|
|
2836
3051
|
self.res_queue = _Queue(1)
|
|
2837
|
-
self.in_queue.
|
|
2838
|
-
self.res_queue._joincancelled = True # pylint: disable=W0212
|
|
3052
|
+
self.in_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
|
|
2839
3053
|
|
|
2840
3054
|
def master_send(self, func_index, data):
|
|
2841
3055
|
self.in_queue.put_nowait((func_index, *data))
|
|
2842
3056
|
|
|
2843
3057
|
def master_receive(self):
|
|
2844
|
-
|
|
3058
|
+
if self.eof is None:
|
|
3059
|
+
raise RuntimeError("EOF is none when get data from worker.")
|
|
3060
|
+
if self.eof.is_set():
|
|
3061
|
+
return None
|
|
3062
|
+
return self.res_queue.get(timeout=1)
|
|
2845
3063
|
|
|
2846
3064
|
def master_close(self):
|
|
2847
3065
|
self.eof.set()
|
|
3066
|
+
self.send_finish_signal_to_worker()
|
|
2848
3067
|
self.send_finish_signal()
|
|
2849
|
-
self.res_queue.cancel_join_thread()
|
|
2850
|
-
self.in_queue.cancel_join_thread()
|
|
2851
3068
|
|
|
2852
3069
|
def send_finish_signal(self):
|
|
2853
3070
|
self.worker_send(None)
|
|
2854
3071
|
|
|
3072
|
+
def send_finish_signal_to_worker(self):
|
|
3073
|
+
self.master_send(0, "QUIT")
|
|
3074
|
+
|
|
2855
3075
|
def worker_send(self, data):
|
|
2856
3076
|
self.res_queue.put_until(data, timeout=1, exit_signal=self.eof)
|
|
2857
3077
|
|
|
@@ -2864,10 +3084,6 @@ class Pipe:
|
|
|
2864
3084
|
func_index, *data = result
|
|
2865
3085
|
return func_index, tuple(data)
|
|
2866
3086
|
|
|
2867
|
-
def worker_close(self):
|
|
2868
|
-
self.res_queue.cancel_join_thread()
|
|
2869
|
-
self.in_queue.cancel_join_thread()
|
|
2870
|
-
|
|
2871
3087
|
|
|
2872
3088
|
def _main_process_already_exit():
|
|
2873
3089
|
"""
|
|
@@ -2885,6 +3101,8 @@ def _worker_loop(operations, pipe, seed=get_seed()):
|
|
|
2885
3101
|
"""
|
|
2886
3102
|
Multiprocess worker process loop.
|
|
2887
3103
|
"""
|
|
3104
|
+
# Ensure that the process does not hung when exiting
|
|
3105
|
+
pipe.res_queue.cancel_join_thread()
|
|
2888
3106
|
|
|
2889
3107
|
def _ignore_sigint():
|
|
2890
3108
|
"""
|
|
@@ -2900,9 +3118,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
|
|
|
2900
3118
|
|
|
2901
3119
|
result = pipe.worker_receive()
|
|
2902
3120
|
if result is None:
|
|
2903
|
-
pipe.worker_close()
|
|
2904
3121
|
return
|
|
2905
3122
|
(idx, input_tensors) = result
|
|
3123
|
+
if input_tensors == "QUIT":
|
|
3124
|
+
break
|
|
2906
3125
|
try:
|
|
2907
3126
|
output_tensors = operations[idx](*input_tensors)
|
|
2908
3127
|
|
|
@@ -2911,6 +3130,10 @@ def _worker_loop(operations, pipe, seed=get_seed()):
|
|
|
2911
3130
|
pipe.worker_send(ExceptionHandler(where="in map(or batch) worker and execute Python function"))
|
|
2912
3131
|
# Do not return
|
|
2913
3132
|
|
|
3133
|
+
# release the queue when stop the worker by master
|
|
3134
|
+
del pipe.in_queue
|
|
3135
|
+
del pipe.res_queue
|
|
3136
|
+
|
|
2914
3137
|
|
|
2915
3138
|
def worker_target(operations, seed=get_seed()):
|
|
2916
3139
|
return lambda pipe: _worker_loop(operations, pipe, seed)
|
|
@@ -2924,20 +3147,54 @@ class _MPWorker(multiprocessing.Process):
|
|
|
2924
3147
|
def __init__(self, operations, warning_ctl, max_rowsize=16, seed=get_seed()):
|
|
2925
3148
|
shared_memory = get_enable_shared_mem()
|
|
2926
3149
|
self.pipe = Pipe(warning_ctl, shared_memory=shared_memory, max_rowsize=max_rowsize)
|
|
3150
|
+
self.check_interval = get_multiprocessing_timeout_interval()
|
|
2927
3151
|
super().__init__(target=worker_target(operations, seed), args=(self.pipe,), daemon=True)
|
|
2928
3152
|
|
|
2929
3153
|
def execute(self, idx, *args):
|
|
3154
|
+
"""Acquiring data from a worker in an infinite loop"""
|
|
2930
3155
|
self.pipe.master_send(idx, args)
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
|
|
3156
|
+
time_s = time.time()
|
|
3157
|
+
wait_count = 1
|
|
3158
|
+
while True:
|
|
3159
|
+
cost_time = time.time() - time_s
|
|
3160
|
+
if cost_time / self.check_interval >= wait_count:
|
|
3161
|
+
wait_count += 1
|
|
3162
|
+
logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
|
|
3163
|
+
"worker of the map operation is hanging. "
|
|
3164
|
+
"Check whether the user defined data transform is too slow or the "
|
|
3165
|
+
"output data is too large. You can also set the timeout interval by "
|
|
3166
|
+
"ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
|
|
3167
|
+
"of this log.")
|
|
3168
|
+
pid = self.pid
|
|
3169
|
+
logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
|
|
3170
|
+
install_status, _ = subprocess.getstatusoutput("py-spy --version")
|
|
3171
|
+
if install_status == 0:
|
|
3172
|
+
stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
|
|
3173
|
+
logger.warning("Map worker subprocess stack:\n{}".format(stack))
|
|
3174
|
+
else:
|
|
3175
|
+
logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
|
|
3176
|
+
try:
|
|
3177
|
+
res = self.pipe.master_receive()
|
|
3178
|
+
except queue.Empty:
|
|
3179
|
+
continue
|
|
3180
|
+
if res is None:
|
|
3181
|
+
# receive finish signal
|
|
3182
|
+
return None
|
|
3183
|
+
if isinstance(res, ExceptionHandler):
|
|
3184
|
+
res.reraise()
|
|
3185
|
+
return res
|
|
2935
3186
|
|
|
2936
3187
|
def close(self):
|
|
2937
3188
|
try:
|
|
2938
3189
|
if self.is_alive():
|
|
3190
|
+
# release the eager executor which is used by current process
|
|
3191
|
+
transforms.transforms.clean_unused_executors()
|
|
3192
|
+
|
|
2939
3193
|
logger.info(f"Closing worker with PID: {self.pid}")
|
|
2940
3194
|
self.pipe.master_close()
|
|
3195
|
+
# del the handle which hold by master
|
|
3196
|
+
del self.pipe.in_queue
|
|
3197
|
+
del self.pipe.res_queue
|
|
2941
3198
|
super().terminate()
|
|
2942
3199
|
super().join()
|
|
2943
3200
|
super().close()
|
|
@@ -2965,6 +3222,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
2965
3222
|
"""
|
|
2966
3223
|
|
|
2967
3224
|
def __init__(self):
|
|
3225
|
+
self.origin_hook = sys.excepthook
|
|
2968
3226
|
sys.excepthook = self.__handler_exception
|
|
2969
3227
|
|
|
2970
3228
|
@staticmethod
|
|
@@ -2976,15 +3234,15 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
2976
3234
|
time.sleep(3)
|
|
2977
3235
|
|
|
2978
3236
|
def __handler_exception(self, ex_type, value, tb):
|
|
2979
|
-
|
|
3237
|
+
self.origin_hook(ex_type, value, tb)
|
|
2980
3238
|
self.mp_pool_exit_preprocess()
|
|
2981
3239
|
|
|
2982
|
-
def __init__(self, op_name, num_parallel_workers, operations,
|
|
3240
|
+
def __init__(self, op_name, num_parallel_workers, operations, max_rowsize=16):
|
|
2983
3241
|
super(_PythonMultiprocessing, self).__init__()
|
|
2984
3242
|
self.op_name = op_name
|
|
2985
3243
|
self.num_parallel_workers = num_parallel_workers
|
|
2986
3244
|
self.operations = operations
|
|
2987
|
-
self.
|
|
3245
|
+
self.max_rowsize = max_rowsize
|
|
2988
3246
|
|
|
2989
3247
|
self.workers = None
|
|
2990
3248
|
self.pids = None
|
|
@@ -3056,6 +3314,9 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3056
3314
|
"ds.config.set_enable_watchdog(False) to block this error.")
|
|
3057
3315
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
3058
3316
|
|
|
3317
|
+
# release the workers
|
|
3318
|
+
del workers
|
|
3319
|
+
|
|
3059
3320
|
@staticmethod
|
|
3060
3321
|
def _terminate_processes(processes):
|
|
3061
3322
|
"""Terminate subprocesses"""
|
|
@@ -3141,6 +3402,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3141
3402
|
time.sleep(0.1)
|
|
3142
3403
|
|
|
3143
3404
|
_PythonMultiprocessing._terminate_processes(workers)
|
|
3405
|
+
del workers
|
|
3144
3406
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
3145
3407
|
|
|
3146
3408
|
def launch(self, op_id=-1):
|
|
@@ -3171,7 +3433,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3171
3433
|
|
|
3172
3434
|
"""
|
|
3173
3435
|
if get_enable_shared_mem():
|
|
3174
|
-
self.
|
|
3436
|
+
_check_shm_usage(self.num_parallel_workers, 1, self.max_rowsize[0], self.max_rowsize[1])
|
|
3175
3437
|
|
|
3176
3438
|
if self.workers is not None:
|
|
3177
3439
|
raise Exception("Pool was already created, close it first.")
|
|
@@ -3183,7 +3445,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3183
3445
|
self.workers = []
|
|
3184
3446
|
self.warning_ctl = multiprocessing.Value('i', 0)
|
|
3185
3447
|
for i in range(self.num_parallel_workers):
|
|
3186
|
-
worker = _MPWorker(self.operations, self.warning_ctl, self.
|
|
3448
|
+
worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, i + get_seed())
|
|
3187
3449
|
worker.start()
|
|
3188
3450
|
self.workers.append(worker)
|
|
3189
3451
|
|
|
@@ -3197,8 +3459,11 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3197
3459
|
atexit.register(self.terminate)
|
|
3198
3460
|
|
|
3199
3461
|
def terminate(self):
|
|
3200
|
-
|
|
3462
|
+
# close watch dog first and then close all the workers
|
|
3201
3463
|
self.abort_watchdog()
|
|
3464
|
+
self.close_all_workers()
|
|
3465
|
+
if hasattr(self, "warning_ctl"):
|
|
3466
|
+
del self.warning_ctl
|
|
3202
3467
|
|
|
3203
3468
|
def get_pids(self):
|
|
3204
3469
|
"""
|
|
@@ -3242,12 +3507,6 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3242
3507
|
def is_mp_enabled(self):
|
|
3243
3508
|
return self.workers is not None
|
|
3244
3509
|
|
|
3245
|
-
def check_shared_memory(self):
|
|
3246
|
-
"""
|
|
3247
|
-
Check if there is enough shared memory in the system.
|
|
3248
|
-
"""
|
|
3249
|
-
_check_shm_usage(self.num_parallel_workers, 1, self.max_row_size, 2)
|
|
3250
|
-
|
|
3251
3510
|
def execute(self, idx, *args):
|
|
3252
3511
|
"""
|
|
3253
3512
|
Execute
|
|
@@ -3294,6 +3553,7 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3294
3553
|
self._abort_watchdog()
|
|
3295
3554
|
if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
|
|
3296
3555
|
_PythonMultiprocessing._terminate_processes([self.cleaning_process])
|
|
3556
|
+
del self.cleaning_process
|
|
3297
3557
|
|
|
3298
3558
|
def is_running(self):
|
|
3299
3559
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
@@ -3301,9 +3561,34 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3301
3561
|
return False
|
|
3302
3562
|
|
|
3303
3563
|
def close_all_workers(self):
|
|
3564
|
+
"""Close all the subprocess workers"""
|
|
3304
3565
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
3305
3566
|
for w in self.workers:
|
|
3306
3567
|
w.close()
|
|
3568
|
+
check_interval = get_multiprocessing_timeout_interval()
|
|
3569
|
+
for w in self.workers:
|
|
3570
|
+
try:
|
|
3571
|
+
subprocess_file_descriptor = w.sentinel
|
|
3572
|
+
st = time.time()
|
|
3573
|
+
while _PythonMultiprocessing.is_process_alive(w.pid):
|
|
3574
|
+
time.sleep(0.01) # sleep 10ms, waiting for the subprocess exit
|
|
3575
|
+
if time.time() - st > check_interval:
|
|
3576
|
+
logger.warning("Waiting for the subprocess worker [{}] to exit.".format(w.pid))
|
|
3577
|
+
st += check_interval
|
|
3578
|
+
except ValueError as e:
|
|
3579
|
+
if "process object is closed" in str(e):
|
|
3580
|
+
continue
|
|
3581
|
+
raise e
|
|
3582
|
+
try:
|
|
3583
|
+
if w.is_alive():
|
|
3584
|
+
os.close(subprocess_file_descriptor)
|
|
3585
|
+
except OSError as e:
|
|
3586
|
+
# Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
|
|
3587
|
+
if "Bad file descriptor" not in str(e):
|
|
3588
|
+
raise e
|
|
3589
|
+
|
|
3590
|
+
# use clear to release the handle which is better than self.workers = None
|
|
3591
|
+
self.workers.clear()
|
|
3307
3592
|
self.workers = None
|
|
3308
3593
|
self.pids = None
|
|
3309
3594
|
|
|
@@ -3315,24 +3600,29 @@ class MapDataset(UnionBaseDataset):
|
|
|
3315
3600
|
Args:
|
|
3316
3601
|
input_dataset (Dataset): Input Dataset to be mapped.
|
|
3317
3602
|
operations (Union[list[TensorOperation], list[functions]]): A function mapping a nested structure of tensors
|
|
3318
|
-
to another nested structure of tensor. Default: None
|
|
3603
|
+
to another nested structure of tensor. Default: ``None``.
|
|
3319
3604
|
input_columns (Union[str, list[str]]): List of names of the input columns.
|
|
3320
|
-
Default: None
|
|
3605
|
+
Default: ``None``, the operations will be applied on the first columns in the dataset.
|
|
3321
3606
|
The size of the list should match the number of inputs of the first operation.
|
|
3322
3607
|
output_columns (Union[str, list[str]], optional): List of names of the output columns.
|
|
3323
3608
|
The size of the list should match the number of outputs of the last operation.
|
|
3324
|
-
Default: None
|
|
3609
|
+
Default: ``None``, output columns will be the input columns, i.e., the columns will
|
|
3325
3610
|
be replaced.
|
|
3326
3611
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
3327
|
-
in parallel. Default: None
|
|
3612
|
+
in parallel. Default: ``None``.
|
|
3328
3613
|
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
|
|
3329
|
-
option could be beneficial if the Python operation is computational heavy. Default: False
|
|
3614
|
+
option could be beneficial if the Python operation is computational heavy. Default: ``False``.
|
|
3330
3615
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
|
3331
|
-
Default: None
|
|
3332
|
-
callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: None
|
|
3333
|
-
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory
|
|
3334
|
-
data between processes
|
|
3335
|
-
|
|
3616
|
+
Default: ``None``, which means no cache is used.
|
|
3617
|
+
callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called. Default: ``None``.
|
|
3618
|
+
max_rowsize(Union[int, list[int]], optional): Maximum size of row in MB that is used for shared memory
|
|
3619
|
+
allocation to copy data between processes, the total occupied shared memory will increase as
|
|
3620
|
+
``num_parallel_workers`` and :func:`mindspore.dataset.config.set_prefetch_size` increase. This is only
|
|
3621
|
+
used if python_multiprocessing is set to True. If it is an int value, it represents ``input_columns`` and
|
|
3622
|
+
``output_columns`` use this value as the unit to create shared memory. If it is a list, the first element
|
|
3623
|
+
represents the ``input_columns`` use this value as the unit to create shared memory, and the second element
|
|
3624
|
+
represents ``output_columns`` use this value as the unit to create shared memory. Default: 16.
|
|
3625
|
+
offload (bool, optional): Flag to indicate whether offload is used. Default: ``None``.
|
|
3336
3626
|
"""
|
|
3337
3627
|
|
|
3338
3628
|
def __init__(self, input_dataset, operations=None, input_columns=None, output_columns=None,
|
|
@@ -3362,7 +3652,10 @@ class MapDataset(UnionBaseDataset):
|
|
|
3362
3652
|
self.process_pool = None
|
|
3363
3653
|
|
|
3364
3654
|
self.callbacks = to_list(callbacks)
|
|
3365
|
-
|
|
3655
|
+
if isinstance(max_rowsize, int):
|
|
3656
|
+
self.max_rowsize = [max_rowsize] * 2
|
|
3657
|
+
else:
|
|
3658
|
+
self.max_rowsize = max_rowsize
|
|
3366
3659
|
self.offload = offload
|
|
3367
3660
|
|
|
3368
3661
|
def parse(self, children=None):
|
|
@@ -3400,7 +3693,7 @@ class MapDataset(UnionBaseDataset):
|
|
|
3400
3693
|
|
|
3401
3694
|
callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
|
|
3402
3695
|
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
|
|
3403
|
-
callbacks,
|
|
3696
|
+
callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
|
|
3404
3697
|
|
|
3405
3698
|
def __deepcopy__(self, memodict):
|
|
3406
3699
|
return self.__safe_deepcopy__(memodict, exclude=("operations", "callbacks", "__transfer_dataset__"))
|
|
@@ -3426,7 +3719,7 @@ class MapDataset(UnionBaseDataset):
|
|
|
3426
3719
|
return op_name
|
|
3427
3720
|
|
|
3428
3721
|
@staticmethod
|
|
3429
|
-
def __construct_debug_hook(previous_op_name=None):
|
|
3722
|
+
def __construct_debug_hook(previous_op_name=None, is_first_op=False):
|
|
3430
3723
|
"""
|
|
3431
3724
|
Wrap debug hook into FuncWrapper.
|
|
3432
3725
|
"""
|
|
@@ -3437,6 +3730,7 @@ class MapDataset(UnionBaseDataset):
|
|
|
3437
3730
|
# making deep copy to allow each debug hook instance hold unique variables
|
|
3438
3731
|
new_fn = copy.deepcopy(fn)
|
|
3439
3732
|
new_fn.set_previous_op_name(previous_op_name)
|
|
3733
|
+
new_fn.set_is_first(is_first_op)
|
|
3440
3734
|
inserted_func = transforms.py_transforms_util.FuncWrapper(new_fn)
|
|
3441
3735
|
inserted_func.implementation = Implementation.PY
|
|
3442
3736
|
inserted_functions.append(inserted_func)
|
|
@@ -3558,7 +3852,8 @@ class MapDataset(UnionBaseDataset):
|
|
|
3558
3852
|
"""
|
|
3559
3853
|
if not get_debug_mode():
|
|
3560
3854
|
return operations
|
|
3561
|
-
|
|
3855
|
+
first_op_name = self.__parse_op_name(operations[0])
|
|
3856
|
+
inserted_operations = self.__construct_debug_hook(first_op_name, is_first_op=True)
|
|
3562
3857
|
for op in operations:
|
|
3563
3858
|
inserted_operations.append(op)
|
|
3564
3859
|
op_name = self.__parse_op_name(op)
|
|
@@ -3588,9 +3883,9 @@ class FilterDataset(UnionBaseDataset):
|
|
|
3588
3883
|
input_dataset (Dataset): Input Dataset to be mapped.
|
|
3589
3884
|
predicate (callable): Python callable which returns a boolean value. If False then filter the element.
|
|
3590
3885
|
input_columns (Union[str, list[str]], optional): List of names of the input columns.
|
|
3591
|
-
Default: None
|
|
3886
|
+
Default: ``None``, the predicate will be applied to all columns in the dataset.
|
|
3592
3887
|
num_parallel_workers (int, optional): Number of workers to process the dataset
|
|
3593
|
-
in parallel. Default: None
|
|
3888
|
+
in parallel. Default: ``None``.
|
|
3594
3889
|
"""
|
|
3595
3890
|
|
|
3596
3891
|
def __init__(self, input_dataset, predicate, input_columns=None, num_parallel_workers=None):
|
|
@@ -3702,6 +3997,8 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
3702
3997
|
"valid samples in the dataset." % child_index)
|
|
3703
3998
|
child_index += 1
|
|
3704
3999
|
|
|
4000
|
+
self._children_sizes = self.children_sizes_.copy()
|
|
4001
|
+
|
|
3705
4002
|
# _children_flag_and_nums: A list of pair<int ,int>.The first element of pair is flag that characterizes
|
|
3706
4003
|
# whether the dataset is mappable. The second element of pair is length of the dataset
|
|
3707
4004
|
self._children_flag_and_nums = []
|
|
@@ -3725,7 +4022,8 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
3725
4022
|
self._children_flag_and_nums.append((1, dataset_len))
|
|
3726
4023
|
|
|
3727
4024
|
def parse(self, children=None):
|
|
3728
|
-
return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_
|
|
4025
|
+
return cde.ConcatNode(children, self._sampler, self._children_flag_and_nums, self._children_start_end_index_,
|
|
4026
|
+
self._children_sizes)
|
|
3729
4027
|
|
|
3730
4028
|
def use_sampler(self, sampler):
|
|
3731
4029
|
"""
|
|
@@ -3741,8 +4039,19 @@ class ConcatDataset(UnionBaseDataset):
|
|
|
3741
4039
|
ValueError: If the parameter NumSamples of sampler is not None.
|
|
3742
4040
|
ValueError: If num_shards <=0.
|
|
3743
4041
|
"""
|
|
3744
|
-
if not isinstance(sampler, samplers.DistributedSampler):
|
|
3745
|
-
raise TypeError("The parameter %s of concat must be DistributedSampler!" % sampler)
|
|
4042
|
+
if not isinstance(sampler, (samplers.DistributedSampler, samplers.RandomSampler)):
|
|
4043
|
+
raise TypeError("The parameter %s of concat must be DistributedSampler or RandomSampler!" % sampler)
|
|
4044
|
+
|
|
4045
|
+
if isinstance(sampler, samplers.RandomSampler):
|
|
4046
|
+
if sampler.replacement:
|
|
4047
|
+
raise ValueError("The parameter replacement of RandomSampler must be False!")
|
|
4048
|
+
|
|
4049
|
+
if sampler.get_num_samples() is not None:
|
|
4050
|
+
raise ValueError("The parameter num_samples of RandomSampler is not support to be set!")
|
|
4051
|
+
|
|
4052
|
+
self._sampler = sampler
|
|
4053
|
+
self._children_sizes = [c.get_dataset_size() for c in self.children]
|
|
4054
|
+
return
|
|
3746
4055
|
|
|
3747
4056
|
if sampler.is_shuffled():
|
|
3748
4057
|
raise ValueError("The parameter shuffle of DistributedSampler must be False!")
|
|
@@ -3845,7 +4154,12 @@ class _ToDevice:
|
|
|
3845
4154
|
self._runtime_context = cde.PythonRuntimeContext()
|
|
3846
4155
|
self._runtime_context.Init()
|
|
3847
4156
|
self._to_device = cde.ToDevice(num_epochs)
|
|
3848
|
-
|
|
4157
|
+
if dataset.get_init_step() != 0:
|
|
4158
|
+
init_step = dataset.get_init_step()
|
|
4159
|
+
dataset_size = dataset.get_dataset_size()
|
|
4160
|
+
self._to_device.Init(ir_tree, init_step, dataset_size)
|
|
4161
|
+
else:
|
|
4162
|
+
self._to_device.Init(ir_tree, 0, -1)
|
|
3849
4163
|
self._runtime_context.AssignConsumer(self._to_device)
|
|
3850
4164
|
|
|
3851
4165
|
ITERATORS_LIST.append(weakref.ref(self))
|
|
@@ -3872,6 +4186,14 @@ class _ToDevice:
|
|
|
3872
4186
|
"""
|
|
3873
4187
|
return self._to_device.GetDataInfo()
|
|
3874
4188
|
|
|
4189
|
+
def get_send_info(self):
|
|
4190
|
+
"""
|
|
4191
|
+
In sink mode, it returns the send information of dataset at this moment.
|
|
4192
|
+
Send information includes number of send batches, time summary of fetching data on host
|
|
4193
|
+
and time summary of sending data.
|
|
4194
|
+
"""
|
|
4195
|
+
return self._to_device.GetSendInfo()
|
|
4196
|
+
|
|
3875
4197
|
def release(self):
|
|
3876
4198
|
"""
|
|
3877
4199
|
Manually terminate Device Queue instead of relying on out of scope destruction.
|
|
@@ -3892,8 +4214,8 @@ class _ToDevice:
|
|
|
3892
4214
|
offload_model = GetOffloadModel(self._to_device, col_names)
|
|
3893
4215
|
return offload_model
|
|
3894
4216
|
|
|
3895
|
-
def _reset(self, step,
|
|
3896
|
-
self._to_device.Reset(step,
|
|
4217
|
+
def _reset(self, step, dataset_size):
|
|
4218
|
+
self._to_device.Reset(step, dataset_size)
|
|
3897
4219
|
|
|
3898
4220
|
|
|
3899
4221
|
class TransferDataset(Dataset):
|
|
@@ -3902,9 +4224,9 @@ class TransferDataset(Dataset):
|
|
|
3902
4224
|
|
|
3903
4225
|
Args:
|
|
3904
4226
|
input_dataset (Dataset): Input Dataset to be transferred.
|
|
3905
|
-
send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: True
|
|
4227
|
+
send_epoch_end (bool, optional): Whether to send end of sequence to device or not. Default: ``True``.
|
|
3906
4228
|
create_data_info_queue (bool, optional): Whether to create queue which stores
|
|
3907
|
-
types and shapes of data or not. Default: False
|
|
4229
|
+
types and shapes of data or not. Default: ``False``.
|
|
3908
4230
|
|
|
3909
4231
|
Raises:
|
|
3910
4232
|
TypeError: If device_type is empty.
|
|
@@ -3912,9 +4234,14 @@ class TransferDataset(Dataset):
|
|
|
3912
4234
|
RuntimeError: If dataset is unknown.
|
|
3913
4235
|
"""
|
|
3914
4236
|
|
|
3915
|
-
def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False):
|
|
4237
|
+
def __init__(self, input_dataset, send_epoch_end=True, create_data_info_queue=False, queue_name=""):
|
|
3916
4238
|
super().__init__(children=input_dataset)
|
|
3917
|
-
|
|
4239
|
+
if queue_name == "":
|
|
4240
|
+
self.queue_name = str(uuid.uuid1())
|
|
4241
|
+
logger.info(f"queue_name is newly generated. value is {self.queue_name}")
|
|
4242
|
+
else:
|
|
4243
|
+
self.queue_name = queue_name
|
|
4244
|
+
logger.info(f"queue_name is read from compile cache. value is {self.queue_name}")
|
|
3918
4245
|
self.device_type = context.get_context("device_target") if context else "CPU"
|
|
3919
4246
|
self.device_id = context.get_context("device_id") if context else 0
|
|
3920
4247
|
|
|
@@ -3973,6 +4300,16 @@ class TransferDataset(Dataset):
|
|
|
3973
4300
|
return self._to_device.get_data_info()
|
|
3974
4301
|
raise RuntimeError("Calling get_data_info with bad state.")
|
|
3975
4302
|
|
|
4303
|
+
def get_send_info(self):
|
|
4304
|
+
"""
|
|
4305
|
+
In sink mode, it returns the send information of dataset at this moment.
|
|
4306
|
+
Send information includes number of send batches, time summary of fetching data on host
|
|
4307
|
+
and time summary of sending data.
|
|
4308
|
+
"""
|
|
4309
|
+
if self._to_device is not None:
|
|
4310
|
+
return self._to_device.get_send_info()
|
|
4311
|
+
raise RuntimeError("Calling get_send_info with bad state, data queue is not initialized.")
|
|
4312
|
+
|
|
3976
4313
|
def get_offload_model(self):
|
|
3977
4314
|
if self._to_device is not None:
|
|
3978
4315
|
return self._to_device.get_offload_model(self.column_name)
|
|
@@ -3986,10 +4323,10 @@ class TransferDataset(Dataset):
|
|
|
3986
4323
|
if self._to_device is not None:
|
|
3987
4324
|
self._to_device.release()
|
|
3988
4325
|
|
|
3989
|
-
def _reset(self, step,
|
|
4326
|
+
def _reset(self, step, dataset_size):
|
|
3990
4327
|
if self._to_device is not None:
|
|
3991
|
-
logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(
|
|
3992
|
-
self._to_device._reset(step,
|
|
4328
|
+
logger.info("Reset the dataset pipeline to step: " + str(step) + ", epoch: " + str(step // dataset_size))
|
|
4329
|
+
self._to_device._reset(step, dataset_size) # pylint: disable=protected-access
|
|
3993
4330
|
|
|
3994
4331
|
|
|
3995
4332
|
class Schema:
|
|
@@ -3997,15 +4334,13 @@ class Schema:
|
|
|
3997
4334
|
Class to represent a schema of a dataset.
|
|
3998
4335
|
|
|
3999
4336
|
Args:
|
|
4000
|
-
schema_file(str): Path of the schema file. Default: None
|
|
4001
|
-
|
|
4002
|
-
Returns:
|
|
4003
|
-
Schema object, schema info about dataset.
|
|
4337
|
+
schema_file (str): Path of the schema file. Default: ``None``.
|
|
4004
4338
|
|
|
4005
4339
|
Raises:
|
|
4006
4340
|
RuntimeError: If schema file failed to load.
|
|
4007
4341
|
|
|
4008
4342
|
Examples:
|
|
4343
|
+
>>> import mindspore.dataset as ds
|
|
4009
4344
|
>>> from mindspore import dtype as mstype
|
|
4010
4345
|
>>>
|
|
4011
4346
|
>>> # Create schema; specify column name, mindspore.dtype and shape of the column
|
|
@@ -4027,16 +4362,17 @@ class Schema:
|
|
|
4027
4362
|
name (str): The new name of the column.
|
|
4028
4363
|
de_type (str): Data type of the column.
|
|
4029
4364
|
shape (list[int], optional): Shape of the column.
|
|
4030
|
-
Default: None
|
|
4365
|
+
Default: ``None``, [-1] which is an unknown shape of rank 1.
|
|
4031
4366
|
|
|
4032
4367
|
Raises:
|
|
4033
4368
|
ValueError: If column type is unknown.
|
|
4034
4369
|
|
|
4035
4370
|
Examples:
|
|
4036
|
-
|
|
4037
|
-
|
|
4038
|
-
|
|
4039
|
-
|
|
4371
|
+
>>> import mindspore.dataset as ds
|
|
4372
|
+
>>> from mindspore import dtype as mstype
|
|
4373
|
+
>>>
|
|
4374
|
+
>>> schema = ds.Schema()
|
|
4375
|
+
>>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
|
|
4040
4376
|
"""
|
|
4041
4377
|
if isinstance(de_type, typing.Type):
|
|
4042
4378
|
de_type = mstype_to_detype(de_type)
|
|
@@ -4084,9 +4420,11 @@ class Schema:
|
|
|
4084
4420
|
|
|
4085
4421
|
Examples:
|
|
4086
4422
|
>>> from mindspore.dataset import Schema
|
|
4423
|
+
>>> from mindspore import dtype as mstype
|
|
4087
4424
|
>>>
|
|
4088
|
-
>>>
|
|
4089
|
-
>>>
|
|
4425
|
+
>>> schema = Schema()
|
|
4426
|
+
>>> schema.add_column('col_1d', de_type=mstype.int64, shape=[2])
|
|
4427
|
+
>>> json = schema.to_json()
|
|
4090
4428
|
"""
|
|
4091
4429
|
return self.cpp_schema.to_json()
|
|
4092
4430
|
|
|
@@ -4104,12 +4442,11 @@ class Schema:
|
|
|
4104
4442
|
|
|
4105
4443
|
Examples:
|
|
4106
4444
|
>>> import json
|
|
4107
|
-
>>>
|
|
4108
4445
|
>>> from mindspore.dataset import Schema
|
|
4109
4446
|
>>>
|
|
4110
|
-
>>> with open("/path/to/schema_file") as file:
|
|
4447
|
+
>>> with open("/path/to/schema_file", "r") as file:
|
|
4111
4448
|
... json_obj = json.load(file)
|
|
4112
|
-
... schema =
|
|
4449
|
+
... schema = Schema()
|
|
4113
4450
|
... schema.from_json(json_obj)
|
|
4114
4451
|
"""
|
|
4115
4452
|
self.cpp_schema.from_string(json.dumps(json_obj, indent=2))
|