mindspore 2.0.0a0__cp39-cp39-win_amd64.whl → 2.0.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -2
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +4 -3
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -48
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +16 -1
- mindspore/_extends/parse/parser.py +107 -22
- mindspore/_extends/parse/resources.py +0 -7
- mindspore/_extends/parse/standard_method.py +885 -413
- mindspore/amp.py +52 -57
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +38 -20
- mindspore/boost/dim_reduce.py +3 -3
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +4 -6
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +41 -7
- mindspore/common/api.py +215 -141
- mindspore/common/dtype.py +8 -1
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +4 -2
- mindspore/common/jit_config.py +17 -13
- mindspore/common/mutable.py +33 -13
- mindspore/common/parameter.py +23 -21
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +62 -41
- mindspore/common/tensor.py +852 -1154
- mindspore/communication/__init__.py +2 -2
- mindspore/communication/_comm_helper.py +11 -4
- mindspore/communication/management.py +22 -21
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +201 -23
- mindspore/dataset/__init__.py +6 -6
- mindspore/dataset/audio/__init__.py +7 -7
- mindspore/dataset/audio/transforms.py +670 -30
- mindspore/dataset/audio/utils.py +47 -4
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +210 -14
- mindspore/dataset/core/validator_helpers.py +2 -2
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +322 -66
- mindspore/dataset/engine/datasets_audio.py +80 -76
- mindspore/dataset/engine/datasets_standard_format.py +51 -38
- mindspore/dataset/engine/datasets_text.py +232 -118
- mindspore/dataset/engine/datasets_user_defined.py +41 -17
- mindspore/dataset/engine/datasets_vision.py +746 -225
- mindspore/dataset/engine/graphdata.py +75 -10
- mindspore/dataset/engine/iterators.py +45 -5
- mindspore/dataset/engine/offload.py +48 -28
- mindspore/dataset/engine/validators.py +117 -8
- mindspore/dataset/text/__init__.py +6 -5
- mindspore/dataset/text/transforms.py +86 -3
- mindspore/dataset/text/utils.py +6 -4
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +3 -2
- mindspore/dataset/transforms/c_transforms.py +1 -1
- mindspore/dataset/transforms/transforms.py +2 -2
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +2 -3
- mindspore/dataset/vision/c_transforms.py +9 -9
- mindspore/dataset/vision/py_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +2 -0
- mindspore/dataset/vision/transforms.py +160 -161
- mindspore/dataset/vision/utils.py +3 -3
- mindspore/experimental/map_parameter.py +38 -26
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +44 -9
- mindspore/include/api/delegate.h +1 -1
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_parallel_runner.h +2 -2
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +19 -3
- mindspore/include/api/types.h +3 -3
- mindspore/include/dataset/constants.h +7 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filereader.py +18 -0
- mindspore/mindrecord/filewriter.py +197 -34
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +3 -3
- mindspore/mindrecord/tools/cifar10_to_mr.py +3 -3
- mindspore/mindrecord/tools/csv_to_mr.py +3 -3
- mindspore/mindrecord/tools/imagenet_to_mr.py +16 -11
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/__init__.py +0 -4
- mindspore/nn/cell.py +204 -132
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +7 -6
- mindspore/nn/layer/__init__.py +5 -4
- mindspore/nn/layer/activation.py +40 -89
- mindspore/nn/layer/basic.py +255 -624
- mindspore/nn/layer/channel_shuffle.py +7 -6
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +41 -4
- mindspore/nn/layer/conv.py +64 -28
- mindspore/nn/layer/dense.py +9 -8
- mindspore/nn/layer/embedding.py +27 -25
- mindspore/nn/layer/image.py +53 -46
- mindspore/nn/layer/math.py +97 -105
- mindspore/nn/layer/normalization.py +117 -86
- mindspore/nn/layer/padding.py +185 -95
- mindspore/nn/layer/pooling.py +817 -414
- mindspore/nn/layer/rnn_cells.py +10 -15
- mindspore/nn/layer/rnns.py +37 -38
- mindspore/nn/layer/thor_layer.py +11 -12
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +5 -4
- mindspore/nn/loss/loss.py +334 -199
- mindspore/nn/optim/ada_grad.py +6 -6
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +4 -5
- mindspore/nn/optim/adam.py +126 -62
- mindspore/nn/optim/adamax.py +3 -4
- mindspore/nn/optim/adasum.py +6 -6
- mindspore/nn/optim/asgd.py +2 -2
- mindspore/nn/optim/ftrl.py +67 -38
- mindspore/nn/optim/lamb.py +4 -5
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +43 -4
- mindspore/nn/optim/momentum.py +6 -5
- mindspore/nn/optim/optimizer.py +3 -1
- mindspore/nn/optim/proximal_ada_grad.py +2 -2
- mindspore/nn/optim/rmsprop.py +1 -1
- mindspore/nn/optim/rprop.py +8 -9
- mindspore/nn/optim/sgd.py +19 -13
- mindspore/nn/optim/thor.py +10 -15
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +4 -4
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +9 -15
- mindspore/nn/probability/distribution/bernoulli.py +3 -3
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +5 -7
- mindspore/nn/probability/distribution/cauchy.py +3 -3
- mindspore/nn/probability/distribution/distribution.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +3 -3
- mindspore/nn/probability/distribution/half_normal.py +15 -11
- mindspore/nn/probability/distribution/laplace.py +16 -13
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/normal.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/student_t.py +20 -15
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +27 -10
- mindspore/nn/wrap/grad_reducer.py +2 -2
- mindspore/nn/wrap/loss_scale.py +40 -24
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +35 -30
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +22 -19
- mindspore/numpy/utils.py +1 -1
- mindspore/numpy/utils_const.py +108 -58
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +86 -117
- mindspore/ops/_grad/grad_base.py +23 -1
- mindspore/ops/_grad/grad_clip_ops.py +2 -3
- mindspore/ops/_grad/grad_comm_ops.py +34 -24
- mindspore/ops/_grad/grad_implementations.py +9 -45
- mindspore/ops/_grad/grad_inner_ops.py +47 -4
- mindspore/ops/_grad/grad_math_ops.py +142 -117
- mindspore/ops/_grad/grad_nn_ops.py +71 -165
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +7 -6
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +150 -15
- mindspore/ops/_grad_experimental/grad_image_ops.py +16 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +1 -22
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +4 -11
- mindspore/ops/_grad_experimental/grad_math_ops.py +210 -89
- mindspore/ops/_grad_experimental/grad_nn_ops.py +26 -22
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +49 -8
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +4 -4
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +2 -2
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -5
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -4
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +236 -4
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_v1.py → adaptive_avg_pool_2d.py} +6 -5
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -43
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/aicpu/bessel_i0.py} +15 -8
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +0 -3
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/{adaptive_avg_pool_2d_grad_v1.py → digamma.py} +7 -9
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/lgamma.py} +16 -10
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/aicpu/trans_data.py +1 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/sparse_slice.py +4 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +6 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -611
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +2 -2
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/matmul_ds.py +2 -0
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/scatter_mul.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -2
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_register_for_op.py +1 -0
- mindspore/ops/_utils/__init__.py +1 -2
- mindspore/ops/_utils/utils.py +19 -40
- mindspore/ops/_vmap/vmap_array_ops.py +116 -38
- mindspore/ops/_vmap/vmap_base.py +16 -9
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +7 -5
- mindspore/ops/_vmap/vmap_image_ops.py +12 -5
- mindspore/ops/_vmap/vmap_math_ops.py +46 -5
- mindspore/ops/_vmap/vmap_nn_ops.py +15 -21
- mindspore/ops/_vmap/vmap_random_ops.py +1 -1
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +220 -106
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -23
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -17
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +39 -41
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +41 -43
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +51 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +24 -25
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +53 -53
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +77 -85
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +37 -39
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +70 -72
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +17 -17
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +2 -0
- mindspore/ops/composite/__init__.py +7 -8
- mindspore/ops/composite/base.py +101 -47
- mindspore/ops/composite/math_ops.py +188 -158
- mindspore/ops/composite/multitype_ops/_compile_utils.py +415 -170
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +142 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +2 -3
- mindspore/ops/composite/multitype_ops/getitem_impl.py +31 -3
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -5
- mindspore/ops/composite/multitype_ops/not_in_impl.py +9 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/setitem_impl.py +21 -3
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +35 -4
- mindspore/ops/function/__init__.py +152 -8
- mindspore/ops/function/array_func.py +2555 -674
- mindspore/ops/function/clip_func.py +209 -13
- mindspore/ops/function/debug_func.py +2 -2
- mindspore/ops/function/grad/__init__.py +2 -1
- mindspore/ops/function/grad/grad_func.py +147 -62
- mindspore/ops/function/image_func.py +54 -38
- mindspore/ops/function/linalg_func.py +167 -16
- mindspore/ops/function/math_func.py +4849 -1492
- mindspore/ops/function/nn_func.py +2573 -988
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +3 -3
- mindspore/ops/function/random_func.py +790 -73
- mindspore/ops/function/sparse_func.py +98 -78
- mindspore/ops/function/sparse_unary_func.py +54 -53
- mindspore/ops/function/spectral_func.py +27 -24
- mindspore/ops/function/vmap_func.py +22 -2
- mindspore/ops/functional.py +97 -37
- mindspore/ops/op_info_register.py +70 -28
- mindspore/ops/operations/__init__.py +47 -14
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +5 -5
- mindspore/ops/operations/_grad_ops.py +276 -187
- mindspore/ops/operations/_inner_ops.py +319 -113
- mindspore/ops/operations/_ms_kernel.py +10 -8
- mindspore/ops/operations/_ocr_ops.py +9 -9
- mindspore/ops/operations/_opaque_predicate_registry.py +4 -0
- mindspore/ops/operations/_quant_ops.py +137 -102
- mindspore/ops/operations/_rl_inner_ops.py +121 -60
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1004 -2
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +801 -466
- mindspore/ops/operations/comm_ops.py +51 -49
- mindspore/ops/operations/control_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +123 -44
- mindspore/ops/operations/debug_ops.py +24 -24
- mindspore/ops/operations/image_ops.py +240 -153
- mindspore/ops/operations/inner_ops.py +34 -50
- mindspore/ops/operations/linalg_ops.py +31 -9
- mindspore/ops/operations/math_ops.py +988 -757
- mindspore/ops/operations/nn_ops.py +965 -819
- mindspore/ops/operations/other_ops.py +51 -40
- mindspore/ops/operations/random_ops.py +204 -122
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +254 -93
- mindspore/ops/operations/spectral_ops.py +35 -3
- mindspore/ops/primitive.py +111 -9
- mindspore/parallel/_auto_parallel_context.py +189 -83
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +99 -7
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +7 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +6 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +20 -16
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +48 -111
- mindspore/parallel/_utils.py +1 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +37 -34
- mindspore/parallel/shard.py +17 -18
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +69 -47
- mindspore/profiler/parser/ascend_timeline_generator.py +49 -42
- mindspore/profiler/parser/base_timeline_generator.py +49 -56
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +98 -78
- mindspore/profiler/parser/hwts_log_parser.py +1 -1
- mindspore/profiler/parser/integrator.py +15 -14
- mindspore/profiler/parser/minddata_analyzer.py +2 -2
- mindspore/profiler/parser/msadvisor_analyzer.py +12 -25
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +2 -1
- mindspore/profiler/profiling.py +218 -186
- mindspore/rewrite/__init__.py +3 -1
- mindspore/rewrite/api/node.py +1 -114
- mindspore/rewrite/api/node_type.py +3 -0
- mindspore/rewrite/api/pattern_engine.py +31 -1
- mindspore/rewrite/api/scoped_value.py +4 -4
- mindspore/rewrite/api/symbol_tree.py +3 -78
- mindspore/rewrite/api/tree_node_helper.py +1 -1
- mindspore/rewrite/ast_creator_register.py +1 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -2
- mindspore/rewrite/ast_helpers/ast_creator.py +1 -2
- mindspore/rewrite/ast_helpers/ast_finder.py +65 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +11 -3
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +18 -2
- mindspore/rewrite/namespace.py +0 -2
- mindspore/rewrite/node.py +157 -11
- mindspore/rewrite/parsers/assign_parser.py +231 -53
- mindspore/rewrite/parsers/class_def_parser.py +187 -109
- mindspore/rewrite/parsers/for_parser.py +24 -14
- mindspore/rewrite/parsers/function_def_parser.py +21 -4
- mindspore/rewrite/parsers/if_parser.py +6 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +256 -133
- mindspore/rewrite/symbol_tree_builder.py +38 -1
- mindspore/run_check/_check_version.py +69 -63
- mindspore/run_check/run_check.py +2 -1
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +1 -1
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +273 -102
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +2 -2
- mindspore/train/callback/_checkpoint.py +3 -3
- mindspore/train/callback/_early_stop.py +3 -3
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +29 -31
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +3 -3
- mindspore/train/callback/_reduce_lr_on_plateau.py +4 -4
- mindspore/train/callback/_summary_collector.py +23 -16
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +15 -3
- mindspore/train/dataset_helper.py +10 -15
- mindspore/train/loss_scale_manager.py +8 -11
- mindspore/train/metrics/__init__.py +1 -1
- mindspore/train/metrics/bleu_score.py +1 -1
- mindspore/train/metrics/confusion_matrix.py +1 -1
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/dice.py +2 -2
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +4 -3
- mindspore/train/metrics/mean_surface_distance.py +2 -2
- mindspore/train/metrics/occlusion_sensitivity.py +1 -1
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +1 -1
- mindspore/train/metrics/recall.py +1 -1
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +45 -28
- mindspore/train/serialization.py +295 -188
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -13
- mindspore/train/train_thor/convert_utils.py +2 -2
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +3 -2
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +610 -541
- mindspore/compression/__init__.py +0 -19
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -515
- mindspore/compression/quant/__init__.py +0 -28
- mindspore/compression/quant/qat.py +0 -634
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -140
- mindspore/nn/probability/dpn/vae/vae.py +0 -124
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -364
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/aicpu/parallel_concat.py +0 -42
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Cast_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/MatMul_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/Transpose_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -15
- mindspore/ops/composite/array_ops.py +0 -241
- mindspore/ops/composite/clip_ops.py +0 -134
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.0.0a0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -32,14 +32,14 @@ import mindspore._c_dataengine as cde
|
|
|
32
32
|
|
|
33
33
|
from .datasets import VisionBaseDataset, SourceDataset, MappableDataset, Shuffle, Schema
|
|
34
34
|
from .datasets_user_defined import GeneratorDataset
|
|
35
|
-
from .validators import
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
from .validators import check_caltech101_dataset, check_caltech256_dataset, check_celebadataset, \
|
|
36
|
+
check_cityscapes_dataset, check_cocodataset, check_div2k_dataset, check_emnist_dataset, check_fake_image_dataset, \
|
|
37
|
+
check_flickr_dataset, check_flowers102dataset, check_food101_dataset, check_imagefolderdataset, \
|
|
38
|
+
check_kittidataset, check_lfw_dataset, check_lsun_dataset, check_manifestdataset, check_mnist_cifar_dataset, \
|
|
39
|
+
check_omniglotdataset, check_photo_tour_dataset, check_places365_dataset, check_qmnist_dataset, \
|
|
40
|
+
check_random_dataset, check_rendered_sst2_dataset, check_sb_dataset, check_sbu_dataset, check_semeion_dataset, \
|
|
41
|
+
check_stl10_dataset, check_sun397_dataset, check_svhn_dataset, check_usps_dataset, check_vocdataset, \
|
|
42
|
+
check_wider_face_dataset
|
|
43
43
|
|
|
44
44
|
from ..core.validator_helpers import replace_none
|
|
45
45
|
|
|
@@ -108,7 +108,7 @@ class _Caltech101Dataset:
|
|
|
108
108
|
|
|
109
109
|
class Caltech101Dataset(GeneratorDataset):
|
|
110
110
|
"""
|
|
111
|
-
|
|
111
|
+
Caltech 101 dataset.
|
|
112
112
|
|
|
113
113
|
The columns of the generated dataset depend on the value of `target_type` .
|
|
114
114
|
|
|
@@ -130,7 +130,7 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
130
130
|
If `target_type` is 'all', return category and annotation. Default: None, means 'category'.
|
|
131
131
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
132
132
|
Default: None, all images.
|
|
133
|
-
num_parallel_workers (int, optional): Number of
|
|
133
|
+
num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
|
|
134
134
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
135
135
|
Default: None, expected order behavior shown in the table below.
|
|
136
136
|
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
@@ -148,7 +148,7 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
148
148
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
149
149
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
150
150
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
151
|
-
ValueError: If `shard_id` is
|
|
151
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
152
152
|
ValueError: If `target_type` is not set correctly.
|
|
153
153
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
154
154
|
|
|
@@ -280,7 +280,7 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
280
280
|
|
|
281
281
|
class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
282
282
|
"""
|
|
283
|
-
|
|
283
|
+
Caltech 256 dataset.
|
|
284
284
|
|
|
285
285
|
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
286
286
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -290,8 +290,9 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
290
290
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
291
291
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
292
292
|
Default: None, all images.
|
|
293
|
-
num_parallel_workers (int, optional): Number of
|
|
294
|
-
Default: None,
|
|
293
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
294
|
+
Default: None, will use global default workers(8), it can be set
|
|
295
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
295
296
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
296
297
|
Default: None, expected order behavior shown in the table below.
|
|
297
298
|
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
@@ -303,7 +304,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
303
304
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
304
305
|
argument can only be specified when `num_shards` is also specified.
|
|
305
306
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
306
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
307
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
307
308
|
Default: None, which means no cache is used.
|
|
308
309
|
|
|
309
310
|
Raises:
|
|
@@ -312,7 +313,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
312
313
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
313
314
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
314
315
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
315
|
-
ValueError: If `shard_id` is
|
|
316
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
316
317
|
ValueError: If `target_type` is not 'category', 'annotation' or 'all'.
|
|
317
318
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
318
319
|
|
|
@@ -408,17 +409,18 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
408
409
|
|
|
409
410
|
class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
410
411
|
"""
|
|
411
|
-
|
|
412
|
-
Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
|
|
412
|
+
CelebA(CelebFaces Attributes) dataset.
|
|
413
413
|
|
|
414
|
+
Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
|
|
414
415
|
The generated dataset has two columns: :py:obj:`[image, attr]` .
|
|
415
416
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
416
417
|
The tensor of column :py:obj:`attr` is of the uint32 type and one hot encoded.
|
|
417
418
|
|
|
418
419
|
Args:
|
|
419
420
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
420
|
-
num_parallel_workers (int, optional): Number of
|
|
421
|
-
|
|
421
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
422
|
+
Default: None, will use global default workers(8), it can be set
|
|
423
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
422
424
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None.
|
|
423
425
|
usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
|
|
424
426
|
Default: 'all', will read all samples.
|
|
@@ -433,7 +435,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
|
433
435
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
434
436
|
argument can only be specified when `num_shards` is also specified.
|
|
435
437
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
436
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
438
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
437
439
|
Default: None, which means no cache is used.
|
|
438
440
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
439
441
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
@@ -444,7 +446,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
|
444
446
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
445
447
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
446
448
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
447
|
-
ValueError: If `shard_id` is
|
|
449
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
448
450
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
449
451
|
ValueError: If `usage` is not 'train', 'valid', 'test' or 'all'.
|
|
450
452
|
|
|
@@ -578,9 +580,9 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
|
578
580
|
|
|
579
581
|
class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
580
582
|
"""
|
|
581
|
-
|
|
582
|
-
This api only supports parsing Cifar10 file in binary version now.
|
|
583
|
+
CIFAR-10 dataset.
|
|
583
584
|
|
|
585
|
+
This api only supports parsing CIFAR-10 file in binary version now.
|
|
584
586
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
585
587
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
586
588
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
@@ -592,8 +594,9 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
592
594
|
Default: None, all samples.
|
|
593
595
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
594
596
|
Default: None, all images.
|
|
595
|
-
num_parallel_workers (int, optional): Number of
|
|
596
|
-
Default: None,
|
|
597
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
598
|
+
Default: None, will use global default workers(8), it can be set
|
|
599
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
597
600
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
598
601
|
order behavior shown in the table below.
|
|
599
602
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -604,7 +607,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
604
607
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
605
608
|
argument can only be specified when `num_shards` is also specified.
|
|
606
609
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
607
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
610
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
608
611
|
Default: None, which means no cache is used.
|
|
609
612
|
|
|
610
613
|
Raises:
|
|
@@ -613,7 +616,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
613
616
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
614
617
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
615
618
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
616
|
-
ValueError: If `shard_id` is
|
|
619
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
617
620
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
618
621
|
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
619
622
|
|
|
@@ -711,7 +714,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
711
714
|
|
|
712
715
|
class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
713
716
|
"""
|
|
714
|
-
|
|
717
|
+
CIFAR-100 dataset.
|
|
715
718
|
|
|
716
719
|
The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]` .
|
|
717
720
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -724,19 +727,20 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
|
724
727
|
Default: None, all samples.
|
|
725
728
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
726
729
|
Default: None, all images.
|
|
727
|
-
num_parallel_workers (int, optional): Number of
|
|
728
|
-
Default: None,
|
|
730
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
731
|
+
Default: None, will use global default workers(8), it can be set
|
|
732
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
729
733
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
730
734
|
order behavior shown in the table below.
|
|
731
735
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
732
736
|
dataset. Default: None, expected order behavior shown in the table below.
|
|
733
737
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
734
|
-
into. Default: None. When this argument is specified,
|
|
738
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
735
739
|
the maximum sample number of per shard.
|
|
736
740
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
737
741
|
argument can only be specified when `num_shards` is also specified.
|
|
738
742
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
739
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
743
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
740
744
|
Default: None, which means no cache is used.
|
|
741
745
|
|
|
742
746
|
Raises:
|
|
@@ -745,7 +749,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
|
745
749
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
746
750
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
747
751
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
748
|
-
ValueError: If `shard_id` is
|
|
752
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
749
753
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
750
754
|
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
751
755
|
|
|
@@ -837,7 +841,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
|
837
841
|
|
|
838
842
|
class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
839
843
|
"""
|
|
840
|
-
|
|
844
|
+
Cityscapes dataset.
|
|
841
845
|
|
|
842
846
|
The generated dataset has two columns :py:obj:`[image, task]` .
|
|
843
847
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -853,8 +857,9 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
853
857
|
'semantic', 'polygon' or 'color'. Default: 'instance'.
|
|
854
858
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
855
859
|
Default: None, all images.
|
|
856
|
-
num_parallel_workers (int, optional): Number of
|
|
857
|
-
Default: None,
|
|
860
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
861
|
+
Default: None, will use global default workers(8), it can be set
|
|
862
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
858
863
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
859
864
|
order behavior shown in the table below.
|
|
860
865
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -866,7 +871,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
866
871
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
867
872
|
argument can only be specified when `num_shards` is also specified.
|
|
868
873
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
869
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
874
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
870
875
|
Default: None, which means no cache is used.
|
|
871
876
|
|
|
872
877
|
Raises:
|
|
@@ -880,7 +885,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
880
885
|
ValueError: If `task` is invalid.
|
|
881
886
|
ValueError: If `quality_mode` is invalid.
|
|
882
887
|
ValueError: If `usage` is invalid.
|
|
883
|
-
ValueError: If `shard_id` is
|
|
888
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
884
889
|
|
|
885
890
|
Note:
|
|
886
891
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -1009,7 +1014,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
1009
1014
|
|
|
1010
1015
|
class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
1011
1016
|
"""
|
|
1012
|
-
|
|
1017
|
+
COCO(Common Objects in Context) dataset.
|
|
1013
1018
|
|
|
1014
1019
|
CocoDataset supports five kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation,
|
|
1015
1020
|
Panoptic Segmentation and Captioning of 2017 Train/Val/Test dataset.
|
|
@@ -1021,8 +1026,9 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1021
1026
|
'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning'. Default: 'Detection'.
|
|
1022
1027
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1023
1028
|
Default: None, all images.
|
|
1024
|
-
num_parallel_workers (int, optional): Number of
|
|
1025
|
-
Default: None,
|
|
1029
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1030
|
+
Default: None, will use global default workers(8), it can be set
|
|
1031
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1026
1032
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1027
1033
|
order behavior shown in the table below.
|
|
1028
1034
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -1034,7 +1040,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1034
1040
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1035
1041
|
argument can only be specified when `num_shards` is also specified.
|
|
1036
1042
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1037
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1043
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1038
1044
|
Default: None, which means no cache is used.
|
|
1039
1045
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
|
|
1040
1046
|
output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: False.
|
|
@@ -1088,12 +1094,12 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1088
1094
|
ValueError: If `task` is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'].
|
|
1089
1095
|
ValueError: If `annotation_file` is not exist.
|
|
1090
1096
|
ValueError: If `dataset_dir` is not exist.
|
|
1091
|
-
ValueError: If `shard_id` is
|
|
1097
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1092
1098
|
|
|
1093
1099
|
Note:
|
|
1094
1100
|
- Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op is added
|
|
1095
1101
|
to remove the prefix('_meta-').
|
|
1096
|
-
-
|
|
1102
|
+
- Not support `mindspore.dataset.PKSampler` for `sampler` parameter yet.
|
|
1097
1103
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1098
1104
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1099
1105
|
|
|
@@ -1254,7 +1260,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1254
1260
|
|
|
1255
1261
|
class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
1256
1262
|
"""
|
|
1257
|
-
|
|
1263
|
+
DIV2K(DIVerse 2K resolution image) dataset.
|
|
1258
1264
|
|
|
1259
1265
|
The generated dataset has two columns :py:obj:`[hr_image, lr_image]` .
|
|
1260
1266
|
The tensor of column :py:obj:`hr_image` and the tensor of column :py:obj:`lr_image` are of the uint8 type.
|
|
@@ -1270,8 +1276,9 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1270
1276
|
When `downgrade` is 'mild', 'difficult' or 'wild', scale can only be 4.
|
|
1271
1277
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1272
1278
|
Default: None, all images.
|
|
1273
|
-
num_parallel_workers (int, optional): Number of
|
|
1274
|
-
Default: None,
|
|
1279
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1280
|
+
Default: None, will use global default workers(8), it can be set
|
|
1281
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1275
1282
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1276
1283
|
order behavior shown in the table below.
|
|
1277
1284
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -1283,7 +1290,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1283
1290
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1284
1291
|
argument can only be specified when `num_shards` is also specified.
|
|
1285
1292
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1286
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1293
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1287
1294
|
Default: None, which means no cache is used.
|
|
1288
1295
|
|
|
1289
1296
|
Raises:
|
|
@@ -1299,7 +1306,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1299
1306
|
ValueError: If `scale` is invalid.
|
|
1300
1307
|
ValueError: If `scale` equal to 8 and downgrade not equal to 'bicubic'.
|
|
1301
1308
|
ValueError: If `downgrade` in ['mild', 'difficult', 'wild'] and `scale` not equal to 4.
|
|
1302
|
-
ValueError: If `shard_id` is
|
|
1309
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1303
1310
|
|
|
1304
1311
|
Note:
|
|
1305
1312
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -1442,7 +1449,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1442
1449
|
|
|
1443
1450
|
class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
1444
1451
|
"""
|
|
1445
|
-
|
|
1452
|
+
EMNIST(Extended MNIST) dataset.
|
|
1446
1453
|
|
|
1447
1454
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
1448
1455
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -1457,8 +1464,9 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1457
1464
|
Default: None, will read all samples.
|
|
1458
1465
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1459
1466
|
Default: None, will read all images.
|
|
1460
|
-
num_parallel_workers (int, optional): Number of
|
|
1461
|
-
Default: None,
|
|
1467
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1468
|
+
Default: None, will use global default workers(8), it can be set
|
|
1469
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1462
1470
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1463
1471
|
Default: None, expected order behavior shown in the table below.
|
|
1464
1472
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -1468,7 +1476,7 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1468
1476
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1469
1477
|
argument can only be specified when `num_shards` is also specified.
|
|
1470
1478
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1471
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1479
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1472
1480
|
Default: None, which means no cache is used.
|
|
1473
1481
|
|
|
1474
1482
|
Raises:
|
|
@@ -1476,7 +1484,7 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1476
1484
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
1477
1485
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1478
1486
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1479
|
-
ValueError: If `shard_id` is
|
|
1487
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1480
1488
|
|
|
1481
1489
|
Note:
|
|
1482
1490
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -1589,8 +1597,9 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1589
1597
|
base_seed (int, optional): Offsets the index-based random seed used to generate each image. Default: 0.
|
|
1590
1598
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1591
1599
|
Default: None, will read all images.
|
|
1592
|
-
num_parallel_workers (int, optional): Number of
|
|
1593
|
-
Default: None,
|
|
1600
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1601
|
+
Default: None, will use global default workers(8), it can be set
|
|
1602
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1594
1603
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1595
1604
|
Default: None, expected order behavior shown in the table below.
|
|
1596
1605
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -1600,7 +1609,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1600
1609
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1601
1610
|
argument can only be specified when `num_shards` is also specified.
|
|
1602
1611
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1603
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1612
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1604
1613
|
Default: None, which means no cache is used.
|
|
1605
1614
|
|
|
1606
1615
|
Raises:
|
|
@@ -1609,7 +1618,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1609
1618
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1610
1619
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1611
1620
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1612
|
-
ValueError: If `shard_id` is
|
|
1621
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1613
1622
|
|
|
1614
1623
|
Note:
|
|
1615
1624
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -1664,7 +1673,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1664
1673
|
|
|
1665
1674
|
class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
1666
1675
|
"""
|
|
1667
|
-
|
|
1676
|
+
Fashion-MNIST dataset.
|
|
1668
1677
|
|
|
1669
1678
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
1670
1679
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -1677,8 +1686,9 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1677
1686
|
Default: None, will read all samples.
|
|
1678
1687
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1679
1688
|
Default: None, will read all images.
|
|
1680
|
-
num_parallel_workers (int, optional): Number of
|
|
1681
|
-
Default: None,
|
|
1689
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1690
|
+
Default: None, will use global default workers(8), it can be set
|
|
1691
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1682
1692
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1683
1693
|
Default: None, expected order behavior shown in the table below.
|
|
1684
1694
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
@@ -1688,7 +1698,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1688
1698
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1689
1699
|
argument can only be specified when `num_shards` is also specified.
|
|
1690
1700
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1691
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1701
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1692
1702
|
Default: None, which means no cache is used.
|
|
1693
1703
|
|
|
1694
1704
|
Raises:
|
|
@@ -1698,7 +1708,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1698
1708
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1699
1709
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1700
1710
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1701
|
-
ValueError: If `shard_id` is
|
|
1711
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1702
1712
|
|
|
1703
1713
|
Note:
|
|
1704
1714
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -1786,7 +1796,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1786
1796
|
|
|
1787
1797
|
class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
1788
1798
|
"""
|
|
1789
|
-
|
|
1799
|
+
Flickr8k and Flickr30k datasets.
|
|
1790
1800
|
|
|
1791
1801
|
The generated dataset has two columns :py:obj:`[image, annotation]` .
|
|
1792
1802
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -1798,8 +1808,9 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1798
1808
|
annotation_file (str): Path to the root directory that contains the annotation.
|
|
1799
1809
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1800
1810
|
Default: None, all images.
|
|
1801
|
-
num_parallel_workers (int, optional): Number of
|
|
1802
|
-
Default: None,
|
|
1811
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1812
|
+
Default: None, will use global default workers(8), it can be set
|
|
1813
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1803
1814
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1804
1815
|
order behavior shown in the table below.
|
|
1805
1816
|
decode (bool, optional): Decode the images after reading. Default: None.
|
|
@@ -1811,7 +1822,7 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1811
1822
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1812
1823
|
argument can only be specified when `num_shards` is also specified.
|
|
1813
1824
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1814
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
1825
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1815
1826
|
Default: None, which means no cache is used.
|
|
1816
1827
|
|
|
1817
1828
|
Raises:
|
|
@@ -1823,7 +1834,7 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1823
1834
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1824
1835
|
ValueError: If `dataset_dir` is not exist.
|
|
1825
1836
|
ValueError: If `annotation_file` is not exist.
|
|
1826
|
-
ValueError: If `shard_id` is
|
|
1837
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1827
1838
|
|
|
1828
1839
|
Note:
|
|
1829
1840
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -2030,7 +2041,7 @@ class _Flowers102Dataset:
|
|
|
2030
2041
|
|
|
2031
2042
|
class Flowers102Dataset(GeneratorDataset):
|
|
2032
2043
|
"""
|
|
2033
|
-
|
|
2044
|
+
Oxfird 102 Flower dataset.
|
|
2034
2045
|
|
|
2035
2046
|
According to the given `task` configuration, the generated dataset has different output columns:
|
|
2036
2047
|
- `task` = 'Classification', output columns: `[image, dtype=uint8]` , `[label, dtype=uint32]` .
|
|
@@ -2043,7 +2054,8 @@ class Flowers102Dataset(GeneratorDataset):
|
|
|
2043
2054
|
usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
|
|
2044
2055
|
Default: 'all', will read all samples.
|
|
2045
2056
|
num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
|
|
2046
|
-
num_parallel_workers (int, optional): Number of subprocesses used to
|
|
2057
|
+
num_parallel_workers (int, optional): Number of worker subprocesses used to
|
|
2058
|
+
fetch the dataset in parallel. Default: 1.
|
|
2047
2059
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2048
2060
|
Default: None, expected order behavior shown in the table below.
|
|
2049
2061
|
decode (bool, optional): Whether or not to decode the images and segmentations after reading. Default: False.
|
|
@@ -2061,7 +2073,7 @@ class Flowers102Dataset(GeneratorDataset):
|
|
|
2061
2073
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2062
2074
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2063
2075
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2064
|
-
ValueError: If `shard_id` is
|
|
2076
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2065
2077
|
|
|
2066
2078
|
Note:
|
|
2067
2079
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -2190,6 +2202,141 @@ class Flowers102Dataset(GeneratorDataset):
|
|
|
2190
2202
|
return class_dict
|
|
2191
2203
|
|
|
2192
2204
|
|
|
2205
|
+
class Food101Dataset(MappableDataset, VisionBaseDataset):
|
|
2206
|
+
"""
|
|
2207
|
+
Food101 dataset.
|
|
2208
|
+
|
|
2209
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
2210
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2211
|
+
The tensor of column :py:obj:`label` is of the string type.
|
|
2212
|
+
|
|
2213
|
+
Args:
|
|
2214
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2215
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'test', or 'all'. 'train' will read
|
|
2216
|
+
from 75,750 samples, 'test' will read from 25,250 samples, and 'all' will read all 'train'
|
|
2217
|
+
and 'test' samples. Default: None, will be set to 'all'.
|
|
2218
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2219
|
+
Default: None, will read all images.
|
|
2220
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2221
|
+
Default: None, will use global default workers(8), it can be set
|
|
2222
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2223
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2224
|
+
Default: None, expected order behavior shown in the table below.
|
|
2225
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2226
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
2227
|
+
Default: None, expected order behavior shown in the table below.
|
|
2228
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. When this argument
|
|
2229
|
+
is specified, `num_samples` reflects the maximum sample number of per shard. Default: None.
|
|
2230
|
+
shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
|
|
2231
|
+
when `num_shards` is also specified. Default: None.
|
|
2232
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2233
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2234
|
+
Default: None, which means no cache is used.
|
|
2235
|
+
|
|
2236
|
+
Raises:
|
|
2237
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2238
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2239
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
2240
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2241
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2242
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2243
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2244
|
+
ValueError: If the value of `usage` is not 'train', 'test', or 'all'.
|
|
2245
|
+
ValueError: If `dataset_dir` is not exist.
|
|
2246
|
+
|
|
2247
|
+
Note:
|
|
2248
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2249
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2250
|
+
|
|
2251
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2252
|
+
:widths: 25 25 50
|
|
2253
|
+
:header-rows: 1
|
|
2254
|
+
|
|
2255
|
+
* - Parameter `sampler`
|
|
2256
|
+
- Parameter `shuffle`
|
|
2257
|
+
- Expected Order Behavior
|
|
2258
|
+
* - None
|
|
2259
|
+
- None
|
|
2260
|
+
- random order
|
|
2261
|
+
* - None
|
|
2262
|
+
- True
|
|
2263
|
+
- random order
|
|
2264
|
+
* - None
|
|
2265
|
+
- False
|
|
2266
|
+
- sequential order
|
|
2267
|
+
* - Sampler object
|
|
2268
|
+
- None
|
|
2269
|
+
- order defined by sampler
|
|
2270
|
+
* - Sampler object
|
|
2271
|
+
- True
|
|
2272
|
+
- not allowed
|
|
2273
|
+
* - Sampler object
|
|
2274
|
+
- False
|
|
2275
|
+
- not allowed
|
|
2276
|
+
|
|
2277
|
+
Examples:
|
|
2278
|
+
>>> food101_dataset_dir = "/path/to/food101_dataset_directory"
|
|
2279
|
+
>>>
|
|
2280
|
+
>>> # Read 3 samples from Food101 dataset
|
|
2281
|
+
>>> dataset = ds.Food101Dataset(dataset_dir=food101_dataset_dir, num_samples=3)
|
|
2282
|
+
|
|
2283
|
+
About Food101 dataset:
|
|
2284
|
+
|
|
2285
|
+
The Food101 is a dataset of 101 food categories, with 101,000 images.
|
|
2286
|
+
There are 250 test imgaes and 750 training images in each class. All images were rescaled
|
|
2287
|
+
to have a maximum side length of 512 pixels.
|
|
2288
|
+
|
|
2289
|
+
The following is the original Food101 dataset structure.
|
|
2290
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
2291
|
+
|
|
2292
|
+
.. code-block::
|
|
2293
|
+
|
|
2294
|
+
.
|
|
2295
|
+
└── food101_dir
|
|
2296
|
+
├── images
|
|
2297
|
+
│ ├── apple_pie
|
|
2298
|
+
│ │ ├── 1005649.jpg
|
|
2299
|
+
│ │ ├── 1014775.jpg
|
|
2300
|
+
│ │ ├──...
|
|
2301
|
+
│ ├── baby_back_rips
|
|
2302
|
+
│ │ ├── 1005293.jpg
|
|
2303
|
+
│ │ ├── 1007102.jpg
|
|
2304
|
+
│ │ ├──...
|
|
2305
|
+
│ └──...
|
|
2306
|
+
└── meta
|
|
2307
|
+
├── train.txt
|
|
2308
|
+
├── test.txt
|
|
2309
|
+
├── classes.txt
|
|
2310
|
+
├── train.json
|
|
2311
|
+
├── test.json
|
|
2312
|
+
└── train.txt
|
|
2313
|
+
|
|
2314
|
+
Citation:
|
|
2315
|
+
|
|
2316
|
+
.. code-block::
|
|
2317
|
+
|
|
2318
|
+
@inproceedings{bossard14,
|
|
2319
|
+
title = {Food-101 -- Mining Discriminative Components with Random Forests},
|
|
2320
|
+
author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc},
|
|
2321
|
+
booktitle = {European Conference on Computer Vision},
|
|
2322
|
+
year = {2014}
|
|
2323
|
+
}
|
|
2324
|
+
"""
|
|
2325
|
+
|
|
2326
|
+
@check_food101_dataset
|
|
2327
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
|
|
2328
|
+
decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
2329
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
2330
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
2331
|
+
|
|
2332
|
+
self.dataset_dir = dataset_dir
|
|
2333
|
+
self.usage = replace_none(usage, "all")
|
|
2334
|
+
self.decode = replace_none(decode, False)
|
|
2335
|
+
|
|
2336
|
+
def parse(self, children=None):
|
|
2337
|
+
return cde.Food101Node(self.dataset_dir, self.usage, self.decode, self.sampler)
|
|
2338
|
+
|
|
2339
|
+
|
|
2193
2340
|
class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
2194
2341
|
"""
|
|
2195
2342
|
A source dataset that reads images from a tree of directories.
|
|
@@ -2203,8 +2350,9 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2203
2350
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2204
2351
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2205
2352
|
Default: None, all images.
|
|
2206
|
-
num_parallel_workers (int, optional): Number of
|
|
2207
|
-
Default: None,
|
|
2353
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2354
|
+
Default: None, will use global default workers(8), it can be set
|
|
2355
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2208
2356
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2209
2357
|
Default: None, expected order behavior shown in the table below.
|
|
2210
2358
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -2222,7 +2370,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2222
2370
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2223
2371
|
argument can only be specified when `num_shards` is also specified.
|
|
2224
2372
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2225
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
2373
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2226
2374
|
Default: None, which means no cache is used.
|
|
2227
2375
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
2228
2376
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
@@ -2235,7 +2383,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2235
2383
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2236
2384
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2237
2385
|
RuntimeError: If `class_indexing` is not a dictionary.
|
|
2238
|
-
ValueError: If `shard_id` is
|
|
2386
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2239
2387
|
|
|
2240
2388
|
Note:
|
|
2241
2389
|
- The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
|
|
@@ -2325,13 +2473,33 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2325
2473
|
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing,
|
|
2326
2474
|
self.decrypt)
|
|
2327
2475
|
|
|
2476
|
+
def get_class_indexing(self):
|
|
2477
|
+
"""
|
|
2478
|
+
Get the class index.
|
|
2479
|
+
|
|
2480
|
+
Returns:
|
|
2481
|
+
dict, a str-to-int mapping from label name to index.
|
|
2482
|
+
|
|
2483
|
+
Examples:
|
|
2484
|
+
>>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
|
|
2485
|
+
>>>
|
|
2486
|
+
>>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
|
|
2487
|
+
>>> class_indexing = dataset.get_class_indexing()
|
|
2488
|
+
"""
|
|
2489
|
+
if self.class_indexing is None or not self.class_indexing:
|
|
2490
|
+
runtime_getter = self._init_tree_getters()
|
|
2491
|
+
_class_indexing = runtime_getter[0].GetClassIndexing()
|
|
2492
|
+
for pair in _class_indexing:
|
|
2493
|
+
self.class_indexing[pair[0]] = pair[1][0]
|
|
2494
|
+
return self.class_indexing
|
|
2328
2495
|
|
|
2329
|
-
|
|
2496
|
+
|
|
2497
|
+
class KITTIDataset(MappableDataset, VisionBaseDataset):
|
|
2330
2498
|
"""
|
|
2331
|
-
|
|
2499
|
+
KITTI dataset.
|
|
2332
2500
|
|
|
2333
|
-
When usage is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
|
|
2334
|
-
occluded, alpha, bbox, dimensions, location, rotation_y]` ; When usage is "test", the generated dataset
|
|
2501
|
+
When `usage` is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
|
|
2502
|
+
occluded, alpha, bbox, dimensions, location, rotation_y]` ; When `usage` is "test", the generated dataset
|
|
2335
2503
|
has only one column: :py:obj:`[image]` .
|
|
2336
2504
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2337
2505
|
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
@@ -2349,20 +2517,21 @@ class KITTIDataset(MappableDataset):
|
|
|
2349
2517
|
train samples, `test` will read from 7518 test samples without label. Default: None, will use `train` .
|
|
2350
2518
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2351
2519
|
Default: None, will include all images.
|
|
2352
|
-
num_parallel_workers (int, optional): Number of
|
|
2353
|
-
Default: None,
|
|
2520
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2521
|
+
Default: None, will use global default workers(8), it can be set
|
|
2522
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2354
2523
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
2355
2524
|
order behavior shown in the table below.
|
|
2356
2525
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2357
2526
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
2358
2527
|
Default: None, expected order behavior shown in the table below.
|
|
2359
2528
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2360
|
-
into. Default: None. When this argument is specified,
|
|
2529
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2361
2530
|
the max sample number of per shard.
|
|
2362
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2363
|
-
argument can only be specified when num_shards is also specified.
|
|
2531
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2532
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2364
2533
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2365
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
2534
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2366
2535
|
Default: None, which means no cache is used.
|
|
2367
2536
|
|
|
2368
2537
|
Raises:
|
|
@@ -2371,7 +2540,7 @@ class KITTIDataset(MappableDataset):
|
|
|
2371
2540
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2372
2541
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2373
2542
|
ValueError: If `dataset_dir` is not exist.
|
|
2374
|
-
ValueError: If `shard_id` is
|
|
2543
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2375
2544
|
|
|
2376
2545
|
Note:
|
|
2377
2546
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -2421,13 +2590,14 @@ class KITTIDataset(MappableDataset):
|
|
|
2421
2590
|
and a 3D laser scanner. Despite its popularity, the dataset itself does not contain ground truth for
|
|
2422
2591
|
semantic segmentation. However, various researchers have manually annotated parts of the dataset to fit
|
|
2423
2592
|
their necessities. Álvarez et al. generated ground truth for 323 images from the road detection challenge
|
|
2424
|
-
with three classes: road,
|
|
2593
|
+
with three classes: road, vehicles and sky. Zhang et al. annotated 252 (140 for training and 112 for testing)
|
|
2425
2594
|
acquisitions – RGB and Velodyne scans – from the tracking challenge for ten object categories: building, sky,
|
|
2426
2595
|
road, vegetation, sidewalk, car, pedestrian, cyclist, sign/pole, and fence.
|
|
2427
2596
|
|
|
2428
2597
|
You can unzip the original KITTI dataset files into this directory structure and read by MindSpore's API.
|
|
2429
2598
|
|
|
2430
2599
|
.. code-block::
|
|
2600
|
+
|
|
2431
2601
|
.
|
|
2432
2602
|
└── kitti_dataset_directory
|
|
2433
2603
|
├── data_object_image_2
|
|
@@ -2475,7 +2645,7 @@ class KITTIDataset(MappableDataset):
|
|
|
2475
2645
|
|
|
2476
2646
|
class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
2477
2647
|
"""
|
|
2478
|
-
|
|
2648
|
+
KMNIST(Kuzushiji-MNIST) dataset.
|
|
2479
2649
|
|
|
2480
2650
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
2481
2651
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -2488,8 +2658,9 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2488
2658
|
Default: None, will read all samples.
|
|
2489
2659
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2490
2660
|
Default: None, will read all images.
|
|
2491
|
-
num_parallel_workers (int, optional): Number of
|
|
2492
|
-
Default: None,
|
|
2661
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2662
|
+
Default: None, will use global default workers(8), it can be set
|
|
2663
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2493
2664
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2494
2665
|
Default: None, expected order behavior shown in the table below.
|
|
2495
2666
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
@@ -2499,7 +2670,7 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2499
2670
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2500
2671
|
argument can only be specified when `num_shards` is also specified.
|
|
2501
2672
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2502
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
2673
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2503
2674
|
Default: None, which means no cache is used.
|
|
2504
2675
|
|
|
2505
2676
|
Raises:
|
|
@@ -2509,7 +2680,7 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2509
2680
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2510
2681
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2511
2682
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2512
|
-
ValueError: If `shard_id` is
|
|
2683
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2513
2684
|
|
|
2514
2685
|
Note:
|
|
2515
2686
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -2595,10 +2766,10 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2595
2766
|
|
|
2596
2767
|
class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
2597
2768
|
"""
|
|
2598
|
-
|
|
2769
|
+
LFW(Labeled Faces in the Wild) dataset.
|
|
2599
2770
|
|
|
2600
|
-
When task is 'people', the generated dataset has two columns: :py:obj:`[image, label]`;
|
|
2601
|
-
When task is 'pairs', the generated dataset has three columns: :py:obj:`[image1, image2, label]` .
|
|
2771
|
+
When `task` is 'people', the generated dataset has two columns: :py:obj:`[image, label]`;
|
|
2772
|
+
When `task` is 'pairs', the generated dataset has three columns: :py:obj:`[image1, image2, label]` .
|
|
2602
2773
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2603
2774
|
The tensor of column :py:obj:`image1` is of the uint8 type.
|
|
2604
2775
|
The tensor of column :py:obj:`image2` is of the uint8 type.
|
|
@@ -2607,37 +2778,43 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2607
2778
|
Args:
|
|
2608
2779
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2609
2780
|
task (str, optional): Set the task type of reading lfw data, support 'people' and 'pairs'.
|
|
2610
|
-
Default: 'people'.
|
|
2781
|
+
Default: None, means 'people'.
|
|
2611
2782
|
usage (str, optional): The image split to use, support '10fold', 'train', 'test' and 'all'.
|
|
2612
|
-
Default:
|
|
2613
|
-
image_set (str, optional):
|
|
2614
|
-
'deepfunneled'. Default:
|
|
2783
|
+
Default: None, will read samples including train and test.
|
|
2784
|
+
image_set (str, optional): Type of image funneling to use, support 'original', 'funneled' or
|
|
2785
|
+
'deepfunneled'. Default: None, will use 'funneled'.
|
|
2615
2786
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2616
2787
|
Default: None, all images.
|
|
2617
|
-
num_parallel_workers (int, optional): Number of
|
|
2618
|
-
Default: None,
|
|
2788
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2789
|
+
Default: None, will use global default workers(8), it can be set
|
|
2790
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2619
2791
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2620
2792
|
Default: None, expected order behavior shown in the table below.
|
|
2621
2793
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2622
2794
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2623
2795
|
dataset. Default: None, expected order behavior shown in the table below.
|
|
2624
2796
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2625
|
-
into. Default: None. When this argument is specified,
|
|
2797
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2626
2798
|
the max sample number of per shard.
|
|
2627
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2628
|
-
argument can only be specified when num_shards is also specified.
|
|
2799
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2800
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2629
2801
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2630
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
2802
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2631
2803
|
Default: None, which means no cache is used.
|
|
2632
2804
|
|
|
2633
2805
|
Raises:
|
|
2806
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2634
2807
|
RuntimeError: If sampler and shuffle are specified at the same time.
|
|
2635
2808
|
RuntimeError: If sampler and sharding are specified at the same time.
|
|
2636
|
-
RuntimeError: If num_shards is specified but shard_id is None.
|
|
2637
|
-
RuntimeError: If shard_id is specified but num_shards is None.
|
|
2638
|
-
ValueError: If shard_id is invalid (< 0 or >= `num_shards` ).
|
|
2809
|
+
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
2810
|
+
RuntimeError: If `shard_id` is specified but num_shards is None.
|
|
2811
|
+
ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
|
|
2639
2812
|
|
|
2640
|
-
|
|
2813
|
+
Note:
|
|
2814
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2815
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2816
|
+
|
|
2817
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2641
2818
|
:widths: 25 25 50
|
|
2642
2819
|
:header-rows: 1
|
|
2643
2820
|
|
|
@@ -2675,15 +2852,17 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2675
2852
|
|
|
2676
2853
|
About LFW dataset:
|
|
2677
2854
|
|
|
2678
|
-
LFW
|
|
2679
|
-
|
|
2680
|
-
of
|
|
2681
|
-
|
|
2682
|
-
|
|
2855
|
+
LFW (Labelled Faces in the Wild) dataset is one of the most commonly used and widely open datasets in
|
|
2856
|
+
the field of face recognition. It was released by Gary B. Huang and his team at Massachusetts Institute
|
|
2857
|
+
of Technology in 2007. The dataset includes nearly 50,000 images of 13,233 individuals, which are sourced
|
|
2858
|
+
from various internet platforms and contain diverse environmental factors such as different poses, lighting
|
|
2859
|
+
conditions, and angles. Most of the images in the dataset are frontal and cover a wide range of ages, genders,
|
|
2860
|
+
and ethnicities.
|
|
2683
2861
|
|
|
2684
2862
|
You can unzip the original LFW dataset files into this directory structure and read by MindSpore's API.
|
|
2685
2863
|
|
|
2686
2864
|
.. code-block::
|
|
2865
|
+
|
|
2687
2866
|
.
|
|
2688
2867
|
└── lfw_dataset_directory
|
|
2689
2868
|
├── lfw
|
|
@@ -2750,7 +2929,7 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2750
2929
|
|
|
2751
2930
|
class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
2752
2931
|
"""
|
|
2753
|
-
|
|
2932
|
+
LSUN(Large-scale Scene UNderstarding) dataset.
|
|
2754
2933
|
|
|
2755
2934
|
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
2756
2935
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -2760,35 +2939,41 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
|
2760
2939
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2761
2940
|
usage (str, optional): Usage of this dataset, can be `train` , `test` , `valid` or `all`
|
|
2762
2941
|
Default: None, will be set to `all` .
|
|
2763
|
-
classes(Union[str, list[str]], optional): Choose the specific classes to load. Default: None, means loading
|
|
2942
|
+
classes (Union[str, list[str]], optional): Choose the specific classes to load. Default: None, means loading
|
|
2764
2943
|
all classes in root directory.
|
|
2765
2944
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2766
2945
|
Default: None, all images.
|
|
2767
|
-
num_parallel_workers (int, optional): Number of
|
|
2768
|
-
Default: None,
|
|
2946
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2947
|
+
Default: None, will use global default workers(8), it can be set
|
|
2948
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2769
2949
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2770
2950
|
Default: None, expected order behavior shown in the table below.
|
|
2771
2951
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2772
2952
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2773
2953
|
dataset. Default: None, expected order behavior shown in the table below.
|
|
2774
2954
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2775
|
-
into. Default: None. When this argument is specified,
|
|
2955
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2776
2956
|
the max sample number of per shard.
|
|
2777
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2778
|
-
argument can only be specified when num_shards is also specified.
|
|
2957
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2958
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2779
2959
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2780
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
2960
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2781
2961
|
Default: None, which means no cache is used.
|
|
2782
2962
|
|
|
2783
2963
|
Raises:
|
|
2784
|
-
RuntimeError: If
|
|
2785
|
-
RuntimeError: If
|
|
2786
|
-
RuntimeError: If
|
|
2787
|
-
RuntimeError: If
|
|
2788
|
-
|
|
2789
|
-
ValueError: If
|
|
2790
|
-
|
|
2791
|
-
|
|
2964
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2965
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2966
|
+
RuntimeError: If `sampler` and sharding are specified at the same time.
|
|
2967
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2968
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2969
|
+
ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
|
|
2970
|
+
ValueError: If `usage` or `classes` is invalid (not in specific types).
|
|
2971
|
+
|
|
2972
|
+
Note:
|
|
2973
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2974
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2975
|
+
|
|
2976
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2792
2977
|
:widths: 25 25 50
|
|
2793
2978
|
:header-rows: 1
|
|
2794
2979
|
|
|
@@ -2827,15 +3012,17 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
|
2827
3012
|
|
|
2828
3013
|
About LSUN dataset:
|
|
2829
3014
|
|
|
2830
|
-
The LSUN
|
|
2831
|
-
in
|
|
3015
|
+
The LSUN (Large-Scale Scene Understanding) is a large-scale dataset used for indoors scene
|
|
3016
|
+
understanding. It was originally launched by Stanford University in 2015 with the aim of
|
|
3017
|
+
providing a challenging and diverse dataset for research in computer vision and machine
|
|
3018
|
+
learning. The main application of this dataset for research is indoor scene analysis.
|
|
2832
3019
|
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
3020
|
+
This dataset contains ten different categories of scenes, including bedrooms, living rooms,
|
|
3021
|
+
restaurants, lounges, studies, kitchens, bathrooms, corridors, children's room, and outdoors.
|
|
3022
|
+
Each category contains tens of thousands of images from different perspectives, and these
|
|
3023
|
+
images are high-quality, high-resolusion real-world images.
|
|
2836
3024
|
|
|
2837
|
-
You can unzip the
|
|
2838
|
-
read by MindSpore's API.
|
|
3025
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
2839
3026
|
|
|
2840
3027
|
.. code-block::
|
|
2841
3028
|
|
|
@@ -2892,8 +3079,9 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2892
3079
|
usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference'. Default: 'train'.
|
|
2893
3080
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2894
3081
|
Default: None, will include all images.
|
|
2895
|
-
num_parallel_workers (int, optional): Number of
|
|
2896
|
-
Default: None, will use
|
|
3082
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3083
|
+
Default: None, will use global default workers(8), it can be set
|
|
3084
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2897
3085
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
2898
3086
|
order behavior shown in the table below.
|
|
2899
3087
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -2908,7 +3096,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2908
3096
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2909
3097
|
argument can only be specified when `num_shards` is also specified.
|
|
2910
3098
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2911
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3099
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2912
3100
|
Default: None, which means no cache is used.
|
|
2913
3101
|
|
|
2914
3102
|
Raises:
|
|
@@ -2919,7 +3107,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2919
3107
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2920
3108
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2921
3109
|
RuntimeError: If class_indexing is not a dictionary.
|
|
2922
|
-
ValueError: If `shard_id` is
|
|
3110
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2923
3111
|
|
|
2924
3112
|
Note:
|
|
2925
3113
|
- The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
|
|
@@ -2960,6 +3148,26 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2960
3148
|
>>>
|
|
2961
3149
|
>>> # 2) Read samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup
|
|
2962
3150
|
>>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, num_shards=2, shard_id=0)
|
|
3151
|
+
|
|
3152
|
+
About Manifest dataset:
|
|
3153
|
+
|
|
3154
|
+
Manifest file contains a list of files included in a dataset, including basic file info such as File name and File
|
|
3155
|
+
ID, along with extended file metadata. Manifest is a data format file supported by Huawei Modelarts. For details,
|
|
3156
|
+
see `Specifications for Importing the Manifest File <https://support.huaweicloud.com/engineers-modelarts/
|
|
3157
|
+
modelarts_23_0009.html>`_ .
|
|
3158
|
+
|
|
3159
|
+
.. code-block::
|
|
3160
|
+
|
|
3161
|
+
.
|
|
3162
|
+
└── manifest_dataset_directory
|
|
3163
|
+
├── train
|
|
3164
|
+
│ ├── 1.JPEG
|
|
3165
|
+
│ ├── 2.JPEG
|
|
3166
|
+
│ ├── ...
|
|
3167
|
+
├── eval
|
|
3168
|
+
│ ├── 1.JPEG
|
|
3169
|
+
│ ├── 2.JPEG
|
|
3170
|
+
│ ├── ...
|
|
2963
3171
|
"""
|
|
2964
3172
|
|
|
2965
3173
|
@check_manifestdataset
|
|
@@ -3001,7 +3209,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
3001
3209
|
|
|
3002
3210
|
class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
3003
3211
|
"""
|
|
3004
|
-
|
|
3212
|
+
MNIST dataset.
|
|
3005
3213
|
|
|
3006
3214
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3007
3215
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -3014,8 +3222,9 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3014
3222
|
Default: None, will read all samples.
|
|
3015
3223
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3016
3224
|
Default: None, will read all images.
|
|
3017
|
-
num_parallel_workers (int, optional): Number of
|
|
3018
|
-
Default: None, will use
|
|
3225
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3226
|
+
Default: None, will use global default workers(8), it can be set
|
|
3227
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3019
3228
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3020
3229
|
Default: None, expected order behavior shown in the table below.
|
|
3021
3230
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -3025,7 +3234,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3025
3234
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3026
3235
|
argument can only be specified when `num_shards` is also specified.
|
|
3027
3236
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3028
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3237
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3029
3238
|
Default: None, which means no cache is used.
|
|
3030
3239
|
|
|
3031
3240
|
Raises:
|
|
@@ -3036,7 +3245,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3036
3245
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3037
3246
|
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
3038
3247
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3039
|
-
ValueError: If `shard_id` is
|
|
3248
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3040
3249
|
|
|
3041
3250
|
Note:
|
|
3042
3251
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -3121,9 +3330,9 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3121
3330
|
return cde.MnistNode(self.dataset_dir, self.usage, self.sampler)
|
|
3122
3331
|
|
|
3123
3332
|
|
|
3124
|
-
class OmniglotDataset(MappableDataset):
|
|
3333
|
+
class OmniglotDataset(MappableDataset, VisionBaseDataset):
|
|
3125
3334
|
"""
|
|
3126
|
-
|
|
3335
|
+
Omniglot dataset.
|
|
3127
3336
|
|
|
3128
3337
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3129
3338
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -3131,32 +3340,34 @@ class OmniglotDataset(MappableDataset):
|
|
|
3131
3340
|
|
|
3132
3341
|
Args:
|
|
3133
3342
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3134
|
-
background(bool, optional):
|
|
3135
|
-
Default: None,
|
|
3343
|
+
background (bool, optional): Whether to create dataset from the "background" set.
|
|
3344
|
+
Otherwise create from the "evaluation" set. Default: None, set to True.
|
|
3136
3345
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3137
3346
|
Default: None, all images.
|
|
3138
|
-
num_parallel_workers (int, optional): Number of
|
|
3139
|
-
Default: None,
|
|
3347
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3348
|
+
Default: None, will use global default workers(8), it can be set
|
|
3349
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3140
3350
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3141
3351
|
Default: None, expected order behavior shown in the table below.
|
|
3142
3352
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
3143
3353
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3144
3354
|
dataset. Default: None, expected order behavior shown in the table below.
|
|
3145
3355
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3146
|
-
into. Default: None. When this argument is specified,
|
|
3356
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
3147
3357
|
the max sample number of per shard.
|
|
3148
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
3149
|
-
argument can only be specified when num_shards is also specified.
|
|
3358
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
3359
|
+
argument can only be specified when `num_shards` is also specified.
|
|
3150
3360
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3151
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3361
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3152
3362
|
Default: None, which means no cache is used.
|
|
3153
3363
|
|
|
3154
3364
|
Raises:
|
|
3365
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3155
3366
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3156
3367
|
RuntimeError: If `sampler` and `sharding` are specified at the same time.
|
|
3157
3368
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3158
3369
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3159
|
-
ValueError: If `shard_id` is
|
|
3370
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3160
3371
|
|
|
3161
3372
|
Note:
|
|
3162
3373
|
- This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -3195,14 +3406,15 @@ class OmniglotDataset(MappableDataset):
|
|
|
3195
3406
|
|
|
3196
3407
|
About Omniglot dataset:
|
|
3197
3408
|
|
|
3198
|
-
The Omniglot dataset is designed for developing more human-like learning algorithms.
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
3409
|
+
The Omniglot dataset is designed for developing more human-like learning algorithms. It contains 1623 different
|
|
3410
|
+
handwritten characters from 50 different alphabets. Each of the 1623 characters was drawn online via Amazon's
|
|
3411
|
+
Mechanical Turk by 20 different people. Each image is paired with stroke data, a sequences of [x, y, t] coordinates
|
|
3412
|
+
with time in milliseconds.
|
|
3202
3413
|
|
|
3203
3414
|
You can unzip the original Omniglot dataset files into this directory structure and read by MindSpore's API.
|
|
3204
3415
|
|
|
3205
3416
|
.. code-block::
|
|
3417
|
+
|
|
3206
3418
|
.
|
|
3207
3419
|
└── omniglot_dataset_directory
|
|
3208
3420
|
├── images_background/
|
|
@@ -3253,7 +3465,7 @@ class OmniglotDataset(MappableDataset):
|
|
|
3253
3465
|
|
|
3254
3466
|
class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
3255
3467
|
"""
|
|
3256
|
-
|
|
3468
|
+
PhotoTour dataset.
|
|
3257
3469
|
|
|
3258
3470
|
According to the given `usage` configuration, the generated dataset has different output columns:
|
|
3259
3471
|
- `usage` = 'train', output columns: `[image, dtype=uint8]` .
|
|
@@ -3271,8 +3483,9 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3271
3483
|
When usage is 'test', will read 100,000 samples for testing.
|
|
3272
3484
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3273
3485
|
Default: None, will read all images.
|
|
3274
|
-
num_parallel_workers (int, optional): Number of
|
|
3275
|
-
Default: None,
|
|
3486
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3487
|
+
Default: None, will use global default workers(8), it can be set
|
|
3488
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3276
3489
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3277
3490
|
Default: None, expected order behavior shown in the table below.
|
|
3278
3491
|
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
@@ -3282,7 +3495,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3282
3495
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3283
3496
|
argument can only be specified when `num_shards` is also specified.
|
|
3284
3497
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3285
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3498
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3286
3499
|
Default: None, which means no cache is used.
|
|
3287
3500
|
|
|
3288
3501
|
Raises:
|
|
@@ -3296,7 +3509,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3296
3509
|
ValueError: If name is not in ["notredame", "yosemite", "liberty",
|
|
3297
3510
|
"notredame_harris", "yosemite_harris", "liberty_harris"].
|
|
3298
3511
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3299
|
-
ValueError: If `shard_id` is
|
|
3512
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3300
3513
|
|
|
3301
3514
|
Note:
|
|
3302
3515
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive. The table
|
|
@@ -3403,7 +3616,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3403
3616
|
|
|
3404
3617
|
class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
3405
3618
|
"""
|
|
3406
|
-
|
|
3619
|
+
Places365 dataset.
|
|
3407
3620
|
|
|
3408
3621
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3409
3622
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -3417,8 +3630,9 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3417
3630
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
3418
3631
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3419
3632
|
Default: None, will read all images.
|
|
3420
|
-
num_parallel_workers (int, optional): Number of
|
|
3421
|
-
Default: None, will use
|
|
3633
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3634
|
+
Default: None, will use global default workers(8), it can be set
|
|
3635
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3422
3636
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3423
3637
|
Default: None, expected order behavior shown in the table below.
|
|
3424
3638
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -3428,7 +3642,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3428
3642
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3429
3643
|
argument can only be specified when `num_shards` is also specified.
|
|
3430
3644
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3431
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3645
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3432
3646
|
Default: None, which means no cache is used.
|
|
3433
3647
|
|
|
3434
3648
|
Raises:
|
|
@@ -3438,14 +3652,14 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3438
3652
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3439
3653
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3440
3654
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3441
|
-
ValueError: If `shard_id` is
|
|
3655
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3442
3656
|
ValueError: If `usage` is not in ["train-standard", "train-challenge", "val"].
|
|
3443
3657
|
|
|
3444
3658
|
Note:
|
|
3445
3659
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
3446
3660
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3447
3661
|
|
|
3448
|
-
.. list-table:: Expected Order Behavior of Using
|
|
3662
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
3449
3663
|
:widths: 25 25 50
|
|
3450
3664
|
:header-rows: 1
|
|
3451
3665
|
|
|
@@ -3543,7 +3757,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3543
3757
|
|
|
3544
3758
|
class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
3545
3759
|
"""
|
|
3546
|
-
|
|
3760
|
+
QMNIST dataset.
|
|
3547
3761
|
|
|
3548
3762
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3549
3763
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -3557,8 +3771,9 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3557
3771
|
information (compat=False). Default: True.
|
|
3558
3772
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3559
3773
|
Default: None, will read all images.
|
|
3560
|
-
num_parallel_workers (int, optional): Number of
|
|
3561
|
-
Default: None, will use
|
|
3774
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3775
|
+
Default: None, will use global default workers(8), it can be set
|
|
3776
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3562
3777
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3563
3778
|
Default: None, expected order behavior shown in the table below.
|
|
3564
3779
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -3568,7 +3783,7 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3568
3783
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3569
3784
|
argument can only be specified when `num_shards` is also specified.
|
|
3570
3785
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3571
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3786
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3572
3787
|
Default: None, which means no cache is used.
|
|
3573
3788
|
|
|
3574
3789
|
Raises:
|
|
@@ -3577,7 +3792,7 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3577
3792
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3578
3793
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3579
3794
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3580
|
-
ValueError: If `shard_id` is
|
|
3795
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3581
3796
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3582
3797
|
|
|
3583
3798
|
Note:
|
|
@@ -3681,15 +3896,16 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3681
3896
|
Default: None, the columns will be named like this "c0", "c1", "c2" etc.
|
|
3682
3897
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
3683
3898
|
Default: None, all samples.
|
|
3684
|
-
num_parallel_workers (int, optional): Number of
|
|
3685
|
-
Default: None,
|
|
3899
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3900
|
+
Default: None, will use global default workers(8), it can be set
|
|
3901
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3686
3902
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3687
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
3903
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3688
3904
|
Default: None, which means no cache is used.
|
|
3689
3905
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3690
3906
|
Default: None, expected order behavior shown in the table below.
|
|
3691
3907
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3692
|
-
into. Default: None. When this argument is specified,
|
|
3908
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
3693
3909
|
the maximum sample number of per shard.
|
|
3694
3910
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3695
3911
|
argument can only be specified when `num_shards` is also specified.
|
|
@@ -3698,7 +3914,7 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3698
3914
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3699
3915
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3700
3916
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3701
|
-
ValueError: If `shard_id` is
|
|
3917
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3702
3918
|
TypeError: If `total_rows` is not of type int.
|
|
3703
3919
|
TypeError: If `num_shards` is not of type int.
|
|
3704
3920
|
TypeError: If `num_parallel_workers` is not of type int.
|
|
@@ -3737,6 +3953,159 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3737
3953
|
return cde.RandomNode(self.total_rows, schema, self.columns_list)
|
|
3738
3954
|
|
|
3739
3955
|
|
|
3956
|
+
class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
|
|
3957
|
+
"""
|
|
3958
|
+
RenderedSST2(Rendered Stanford Sentiment Treebank v2) dataset.
|
|
3959
|
+
|
|
3960
|
+
The generated dataset has two columns: :py:obj:`[image, label]`.
|
|
3961
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3962
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
3963
|
+
|
|
3964
|
+
Args:
|
|
3965
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3966
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'val', 'test' or 'all'.
|
|
3967
|
+
Default: None, will read all samples.
|
|
3968
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3969
|
+
Default: None, will include all images.
|
|
3970
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3971
|
+
Default: None, will use global default workers(8), it can be set
|
|
3972
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3973
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3974
|
+
Default: None, expected order behavior shown in the table below.
|
|
3975
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
3976
|
+
sampler (Sampler, optional): Object used to choose samples from the
|
|
3977
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3978
|
+
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3979
|
+
into. When this argument is specified, `num_samples` reflects
|
|
3980
|
+
the maximum sample number of per shard. Default: None.
|
|
3981
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
3982
|
+
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
3983
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3984
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3985
|
+
Default: None, which means no cache is used.
|
|
3986
|
+
|
|
3987
|
+
Raises:
|
|
3988
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3989
|
+
ValueError: If `usage` is not 'train', 'test', 'val' or 'all'.
|
|
3990
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3991
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3992
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3993
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3994
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3995
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3996
|
+
|
|
3997
|
+
Note:
|
|
3998
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
3999
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
4000
|
+
|
|
4001
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4002
|
+
:widths: 25 25 50
|
|
4003
|
+
:header-rows: 1
|
|
4004
|
+
|
|
4005
|
+
* - Parameter `sampler`
|
|
4006
|
+
- Parameter `shuffle`
|
|
4007
|
+
- Expected Order Behavior
|
|
4008
|
+
* - None
|
|
4009
|
+
- None
|
|
4010
|
+
- random order
|
|
4011
|
+
* - None
|
|
4012
|
+
- True
|
|
4013
|
+
- random order
|
|
4014
|
+
* - None
|
|
4015
|
+
- False
|
|
4016
|
+
- sequential order
|
|
4017
|
+
* - Sampler object
|
|
4018
|
+
- None
|
|
4019
|
+
- order defined by sampler
|
|
4020
|
+
* - Sampler object
|
|
4021
|
+
- True
|
|
4022
|
+
- not allowed
|
|
4023
|
+
* - Sampler object
|
|
4024
|
+
- False
|
|
4025
|
+
- not allowed
|
|
4026
|
+
|
|
4027
|
+
Examples:
|
|
4028
|
+
>>> rendered_sst2_dataset_dir = "/path/to/rendered_sst2_dataset_directory"
|
|
4029
|
+
>>>
|
|
4030
|
+
>>> # 1) Read all samples (image files) in rendered_sst2_dataset_dir with 8 threads
|
|
4031
|
+
>>> dataset = ds.RenderedSST2Dataset(dataset_dir=rendered_sst2_dataset_dir,
|
|
4032
|
+
... usage="all", num_parallel_workers=8)
|
|
4033
|
+
|
|
4034
|
+
About RenderedSST2Dataset:
|
|
4035
|
+
|
|
4036
|
+
Rendered SST2 is an image classification dataset which was generated by rendering sentences in the Standford
|
|
4037
|
+
Sentiment Treebank v2 dataset. There are three splits in this dataset and each split contains two classes
|
|
4038
|
+
(positive and negative): a train split containing 6920 images (3610 positive and 3310 negative), a validation
|
|
4039
|
+
split containing 872 images (444 positive and 428 negative), and a test split containing 1821 images
|
|
4040
|
+
(909 positive and 912 negative).
|
|
4041
|
+
|
|
4042
|
+
Here is the original RenderedSST2 dataset structure.
|
|
4043
|
+
You can unzip the dataset files into the following directory structure and read by MindSpore's API.
|
|
4044
|
+
|
|
4045
|
+
.. code-block::
|
|
4046
|
+
|
|
4047
|
+
.
|
|
4048
|
+
└── rendered_sst2_dataset_directory
|
|
4049
|
+
├── train
|
|
4050
|
+
│ ├── negative
|
|
4051
|
+
│ │ ├── 0001.jpg
|
|
4052
|
+
│ │ ├── 0002.jpg
|
|
4053
|
+
│ │ ...
|
|
4054
|
+
│ └── positive
|
|
4055
|
+
│ ├── 0001.jpg
|
|
4056
|
+
│ ├── 0002.jpg
|
|
4057
|
+
│ ...
|
|
4058
|
+
├── test
|
|
4059
|
+
│ ├── negative
|
|
4060
|
+
│ │ ├── 0001.jpg
|
|
4061
|
+
│ │ ├── 0002.jpg
|
|
4062
|
+
│ │ ...
|
|
4063
|
+
│ └── positive
|
|
4064
|
+
│ ├── 0001.jpg
|
|
4065
|
+
│ ├── 0002.jpg
|
|
4066
|
+
│ ...
|
|
4067
|
+
└── valid
|
|
4068
|
+
├── negative
|
|
4069
|
+
│ ├── 0001.jpg
|
|
4070
|
+
│ ├── 0002.jpg
|
|
4071
|
+
│ ...
|
|
4072
|
+
└── positive
|
|
4073
|
+
├── 0001.jpg
|
|
4074
|
+
├── 0002.jpg
|
|
4075
|
+
...
|
|
4076
|
+
|
|
4077
|
+
Citation:
|
|
4078
|
+
|
|
4079
|
+
.. code-block::
|
|
4080
|
+
|
|
4081
|
+
@inproceedings{socher-etal-2013-recursive,
|
|
4082
|
+
title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
|
|
4083
|
+
author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
|
|
4084
|
+
Christopher D. and Ng, Andrew and Potts, Christopher},
|
|
4085
|
+
booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
|
|
4086
|
+
month = oct,
|
|
4087
|
+
year = {2013},
|
|
4088
|
+
address = {Seattle, Washington, USA},
|
|
4089
|
+
publisher = {Association for Computational Linguistics},
|
|
4090
|
+
url = {https://www.aclweb.org/anthology/D13-1170},
|
|
4091
|
+
pages = {1631--1642},
|
|
4092
|
+
}
|
|
4093
|
+
"""
|
|
4094
|
+
|
|
4095
|
+
@check_rendered_sst2_dataset
|
|
4096
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
|
|
4097
|
+
decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
4098
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
4099
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
4100
|
+
|
|
4101
|
+
self.dataset_dir = dataset_dir
|
|
4102
|
+
self.usage = replace_none(usage, "all")
|
|
4103
|
+
self.decode = replace_none(decode, False)
|
|
4104
|
+
|
|
4105
|
+
def parse(self, children=None):
|
|
4106
|
+
return cde.RenderedSST2Node(self.dataset_dir, self.usage, self.decode, self.sampler)
|
|
4107
|
+
|
|
4108
|
+
|
|
3740
4109
|
class _SBDataset:
|
|
3741
4110
|
"""
|
|
3742
4111
|
Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
|
|
@@ -3799,7 +4168,7 @@ class _SBDataset:
|
|
|
3799
4168
|
|
|
3800
4169
|
class SBDataset(GeneratorDataset):
|
|
3801
4170
|
"""
|
|
3802
|
-
|
|
4171
|
+
SB(Semantic Boundaries) Dataset.
|
|
3803
4172
|
|
|
3804
4173
|
By configuring the 'Task' parameter, the generated dataset has different output columns.
|
|
3805
4174
|
|
|
@@ -3814,7 +4183,7 @@ class SBDataset(GeneratorDataset):
|
|
|
3814
4183
|
usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all'. Default: 'all'.
|
|
3815
4184
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3816
4185
|
Default: None, all images.
|
|
3817
|
-
num_parallel_workers (int, optional): Number of
|
|
4186
|
+
num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
|
|
3818
4187
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
3819
4188
|
order behavior shown in the table below.
|
|
3820
4189
|
decode (bool, optional): Decode the images after reading. Default: None.
|
|
@@ -3836,7 +4205,7 @@ class SBDataset(GeneratorDataset):
|
|
|
3836
4205
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3837
4206
|
ValueError: If `task` is not in ['Boundaries', 'Segmentation'].
|
|
3838
4207
|
ValueError: If `usage` is not in ['train', 'val', 'train_noval', 'all'].
|
|
3839
|
-
ValueError: If `shard_id` is
|
|
4208
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3840
4209
|
|
|
3841
4210
|
Note:
|
|
3842
4211
|
- This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -3933,7 +4302,7 @@ class SBDataset(GeneratorDataset):
|
|
|
3933
4302
|
|
|
3934
4303
|
class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
3935
4304
|
"""
|
|
3936
|
-
|
|
4305
|
+
SBU(SBU Captioned Photo) dataset.
|
|
3937
4306
|
|
|
3938
4307
|
The generated dataset has two columns :py:obj:`[image, caption]` .
|
|
3939
4308
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -3943,8 +4312,9 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
3943
4312
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3944
4313
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3945
4314
|
Default: None, will read all images.
|
|
3946
|
-
num_parallel_workers (int, optional): Number of
|
|
3947
|
-
Default: None, will use
|
|
4315
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4316
|
+
Default: None, will use global default workers(8), it can be set
|
|
4317
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3948
4318
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3949
4319
|
Default: None, expected order behavior shown in the table below.
|
|
3950
4320
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -3955,7 +4325,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
3955
4325
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3956
4326
|
argument can only be specified when `num_shards` is also specified.
|
|
3957
4327
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3958
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
4328
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3959
4329
|
Default: None, which means no cache is used.
|
|
3960
4330
|
|
|
3961
4331
|
Raises:
|
|
@@ -3965,13 +4335,13 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
3965
4335
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3966
4336
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3967
4337
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3968
|
-
ValueError: If `shard_id` is
|
|
4338
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3969
4339
|
|
|
3970
4340
|
Note:
|
|
3971
4341
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
3972
4342
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3973
4343
|
|
|
3974
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4344
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
3975
4345
|
:widths: 25 25 50
|
|
3976
4346
|
:header-rows: 1
|
|
3977
4347
|
|
|
@@ -4048,7 +4418,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
4048
4418
|
|
|
4049
4419
|
class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
4050
4420
|
"""
|
|
4051
|
-
|
|
4421
|
+
Semeion dataset.
|
|
4052
4422
|
|
|
4053
4423
|
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
4054
4424
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -4058,8 +4428,9 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
4058
4428
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4059
4429
|
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
4060
4430
|
Default: None, will read all images.
|
|
4061
|
-
num_parallel_workers (int, optional): Number of
|
|
4062
|
-
Default: None,
|
|
4431
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4432
|
+
Default: None, will use global default workers(8), it can be set
|
|
4433
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4063
4434
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4064
4435
|
order behavior shown in the table below.
|
|
4065
4436
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
@@ -4070,7 +4441,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
4070
4441
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4071
4442
|
argument can only be specified when `num_shards` is also specified.
|
|
4072
4443
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4073
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
4444
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4074
4445
|
Default: None, which means no cache is used.
|
|
4075
4446
|
|
|
4076
4447
|
Raises:
|
|
@@ -4080,7 +4451,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
4080
4451
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4081
4452
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4082
4453
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4083
|
-
ValueError: If `shard_id` is
|
|
4454
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4084
4455
|
|
|
4085
4456
|
Note:
|
|
4086
4457
|
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -4170,7 +4541,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
4170
4541
|
|
|
4171
4542
|
class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
4172
4543
|
"""
|
|
4173
|
-
|
|
4544
|
+
STL-10 dataset.
|
|
4174
4545
|
|
|
4175
4546
|
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4176
4547
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -4186,19 +4557,20 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4186
4557
|
Default: None, all samples.
|
|
4187
4558
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4188
4559
|
Default: None, all images.
|
|
4189
|
-
num_parallel_workers (int, optional): Number of
|
|
4190
|
-
Default: None,
|
|
4560
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4561
|
+
Default: None, will use global default workers(8), it can be set
|
|
4562
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4191
4563
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4192
4564
|
order behavior shown in the table below.
|
|
4193
4565
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
4194
4566
|
dataset. Default: None, expected order behavior shown in the table below.
|
|
4195
4567
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4196
|
-
into. Default: None. When this argument is specified,
|
|
4568
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
4197
4569
|
the max sample number of per shard.
|
|
4198
4570
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4199
4571
|
argument can only be specified when `num_shards` is also specified.
|
|
4200
4572
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4201
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
4573
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4202
4574
|
Default: None, which means no cache is used.
|
|
4203
4575
|
|
|
4204
4576
|
Raises:
|
|
@@ -4209,13 +4581,13 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4209
4581
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4210
4582
|
ValueError: If `usage` is invalid.
|
|
4211
4583
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4212
|
-
ValueError: If `shard_id` is
|
|
4584
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4213
4585
|
|
|
4214
4586
|
Note:
|
|
4215
4587
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
4216
4588
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4217
4589
|
|
|
4218
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4590
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4219
4591
|
:widths: 25 25 50
|
|
4220
4592
|
:header-rows: 1
|
|
4221
4593
|
|
|
@@ -4304,6 +4676,151 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4304
4676
|
return cde.STL10Node(self.dataset_dir, self.usage, self.sampler)
|
|
4305
4677
|
|
|
4306
4678
|
|
|
4679
|
+
class SUN397Dataset(MappableDataset, VisionBaseDataset):
|
|
4680
|
+
"""
|
|
4681
|
+
SUN397(Scene UNderstanding) dataset.
|
|
4682
|
+
|
|
4683
|
+
The generated dataset has two columns: :py:obj:`[image, label]`.
|
|
4684
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4685
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
4686
|
+
|
|
4687
|
+
Args:
|
|
4688
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4689
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4690
|
+
Default: None, all images.
|
|
4691
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4692
|
+
Default: None, will use global default workers(8), it can be set
|
|
4693
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4694
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4695
|
+
Default: None, expected order behavior shown in the table below.
|
|
4696
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
4697
|
+
sampler (Sampler, optional): Object used to choose samples from the
|
|
4698
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
4699
|
+
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4700
|
+
into. When this argument is specified, `num_samples` reflects
|
|
4701
|
+
the maximum sample number of per shard. Default: None.
|
|
4702
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
4703
|
+
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
4704
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4705
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4706
|
+
Default: None, which means no cache is used.
|
|
4707
|
+
|
|
4708
|
+
Raises:
|
|
4709
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
4710
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4711
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4712
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4713
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4714
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4715
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4716
|
+
|
|
4717
|
+
Note:
|
|
4718
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
4719
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
4720
|
+
|
|
4721
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4722
|
+
:widths: 25 25 50
|
|
4723
|
+
:header-rows: 1
|
|
4724
|
+
|
|
4725
|
+
* - Parameter `sampler`
|
|
4726
|
+
- Parameter `shuffle`
|
|
4727
|
+
- Expected Order Behavior
|
|
4728
|
+
* - None
|
|
4729
|
+
- None
|
|
4730
|
+
- random order
|
|
4731
|
+
* - None
|
|
4732
|
+
- True
|
|
4733
|
+
- random order
|
|
4734
|
+
* - None
|
|
4735
|
+
- False
|
|
4736
|
+
- sequential order
|
|
4737
|
+
* - Sampler object
|
|
4738
|
+
- None
|
|
4739
|
+
- order defined by sampler
|
|
4740
|
+
* - Sampler object
|
|
4741
|
+
- True
|
|
4742
|
+
- not allowed
|
|
4743
|
+
* - Sampler object
|
|
4744
|
+
- False
|
|
4745
|
+
- not allowed
|
|
4746
|
+
|
|
4747
|
+
Examples:
|
|
4748
|
+
>>> sun397_dataset_dir = "/path/to/sun397_dataset_directory"
|
|
4749
|
+
>>>
|
|
4750
|
+
>>> # 1) Read all samples (image files) in sun397_dataset_dir with 8 threads
|
|
4751
|
+
>>> dataset = ds.SUN397Dataset(dataset_dir=sun397_dataset_dir, num_parallel_workers=8)
|
|
4752
|
+
|
|
4753
|
+
About SUN397Dataset:
|
|
4754
|
+
|
|
4755
|
+
The SUN397 or Scene UNderstanding (SUN) is a dataset for scene recognition consisting of 397 categories with
|
|
4756
|
+
108,754 images. The number of images varies across categories, but there are at least 100 images per category.
|
|
4757
|
+
Images are in jpg, png, or gif format.
|
|
4758
|
+
|
|
4759
|
+
Here is the original SUN397 dataset structure.
|
|
4760
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
4761
|
+
|
|
4762
|
+
.. code-block::
|
|
4763
|
+
|
|
4764
|
+
.
|
|
4765
|
+
└── sun397_dataset_directory
|
|
4766
|
+
├── ClassName.txt
|
|
4767
|
+
├── README.txt
|
|
4768
|
+
├── a
|
|
4769
|
+
│ ├── abbey
|
|
4770
|
+
│ │ ├── sun_aaaulhwrhqgejnyt.jpg
|
|
4771
|
+
│ │ ├── sun_aacphuqehdodwawg.jpg
|
|
4772
|
+
│ │ ├── ...
|
|
4773
|
+
│ ├── apartment_building
|
|
4774
|
+
│ │ └── outdoor
|
|
4775
|
+
│ │ ├── sun_aamyhslnsnomjzue.jpg
|
|
4776
|
+
│ │ ├── sun_abbjzfrsalhqivis.jpg
|
|
4777
|
+
│ │ ├── ...
|
|
4778
|
+
│ ├── ...
|
|
4779
|
+
├── b
|
|
4780
|
+
│ ├── badlands
|
|
4781
|
+
│ │ ├── sun_aabtemlmesogqbbp.jpg
|
|
4782
|
+
│ │ ├── sun_afbsfeexggdhzshd.jpg
|
|
4783
|
+
│ │ ├── ...
|
|
4784
|
+
│ ├── balcony
|
|
4785
|
+
│ │ ├── exterior
|
|
4786
|
+
│ │ │ ├── sun_aaxzaiuznwquburq.jpg
|
|
4787
|
+
│ │ │ ├── sun_baajuldidvlcyzhv.jpg
|
|
4788
|
+
│ │ │ ├── ...
|
|
4789
|
+
│ │ └── interior
|
|
4790
|
+
│ │ ├── sun_babkzjntjfarengi.jpg
|
|
4791
|
+
│ │ ├── sun_bagjvjynskmonnbv.jpg
|
|
4792
|
+
│ │ ├── ...
|
|
4793
|
+
│ └── ...
|
|
4794
|
+
├── ...
|
|
4795
|
+
|
|
4796
|
+
|
|
4797
|
+
Citation:
|
|
4798
|
+
|
|
4799
|
+
.. code-block::
|
|
4800
|
+
|
|
4801
|
+
@inproceedings{xiao2010sun,
|
|
4802
|
+
title = {Sun database: Large-scale scene recognition from abbey to zoo},
|
|
4803
|
+
author = {Xiao, Jianxiong and Hays, James and Ehinger, Krista A and Oliva, Aude and Torralba, Antonio},
|
|
4804
|
+
booktitle = {2010 IEEE computer society conference on computer vision and pattern recognition},
|
|
4805
|
+
pages = {3485--3492},
|
|
4806
|
+
year = {2010},
|
|
4807
|
+
organization = {IEEE}
|
|
4808
|
+
}
|
|
4809
|
+
"""
|
|
4810
|
+
|
|
4811
|
+
@check_sun397_dataset
|
|
4812
|
+
def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
|
|
4813
|
+
sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
4814
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
4815
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
4816
|
+
|
|
4817
|
+
self.dataset_dir = dataset_dir
|
|
4818
|
+
self.decode = replace_none(decode, False)
|
|
4819
|
+
|
|
4820
|
+
def parse(self, children=None):
|
|
4821
|
+
return cde.SUN397Node(self.dataset_dir, self.decode, self.sampler)
|
|
4822
|
+
|
|
4823
|
+
|
|
4307
4824
|
class _SVHNDataset:
|
|
4308
4825
|
"""
|
|
4309
4826
|
Mainly for loading SVHN Dataset, and return two rows each time.
|
|
@@ -4342,7 +4859,7 @@ class _SVHNDataset:
|
|
|
4342
4859
|
|
|
4343
4860
|
class SVHNDataset(GeneratorDataset):
|
|
4344
4861
|
"""
|
|
4345
|
-
|
|
4862
|
+
SVHN(Street View House Numbers) dataset.
|
|
4346
4863
|
|
|
4347
4864
|
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4348
4865
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -4353,15 +4870,16 @@ class SVHNDataset(GeneratorDataset):
|
|
|
4353
4870
|
usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset.
|
|
4354
4871
|
Default: None, will read all samples.
|
|
4355
4872
|
num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
|
|
4356
|
-
num_parallel_workers (int, optional): Number of subprocesses used to
|
|
4873
|
+
num_parallel_workers (int, optional): Number of worker subprocesses used to
|
|
4874
|
+
fetch the dataset in parallel. Default: 1.
|
|
4357
4875
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4358
4876
|
Default: None, expected order behavior shown in the table below.
|
|
4359
4877
|
sampler (Sampler, optional): Object used to choose samples from the dataset. Random accessible
|
|
4360
4878
|
input is required. Default: None, expected order behavior shown in the table below.
|
|
4361
4879
|
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
4362
|
-
When this argument is specified,
|
|
4880
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
4363
4881
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument must be specified only
|
|
4364
|
-
when num_shards is also specified.
|
|
4882
|
+
when `num_shards` is also specified.
|
|
4365
4883
|
|
|
4366
4884
|
Raises:
|
|
4367
4885
|
RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
|
|
@@ -4371,13 +4889,13 @@ class SVHNDataset(GeneratorDataset):
|
|
|
4371
4889
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4372
4890
|
ValueError: If `usage` is invalid.
|
|
4373
4891
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4374
|
-
ValueError: If `shard_id` is
|
|
4892
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4375
4893
|
|
|
4376
4894
|
Note:
|
|
4377
4895
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
4378
4896
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4379
4897
|
|
|
4380
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4898
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4381
4899
|
:widths: 25 25 50
|
|
4382
4900
|
:header-rows: 1
|
|
4383
4901
|
|
|
@@ -4451,7 +4969,7 @@ class SVHNDataset(GeneratorDataset):
|
|
|
4451
4969
|
|
|
4452
4970
|
class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
4453
4971
|
"""
|
|
4454
|
-
|
|
4972
|
+
USPS(U.S. Postal Service) dataset.
|
|
4455
4973
|
|
|
4456
4974
|
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4457
4975
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
@@ -4464,8 +4982,9 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4464
4982
|
Default: None, will read all samples.
|
|
4465
4983
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4466
4984
|
Default: None, will read all images.
|
|
4467
|
-
num_parallel_workers (int, optional): Number of
|
|
4468
|
-
Default: None, will use
|
|
4985
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4986
|
+
Default: None, will use global default workers(8), it can be set
|
|
4987
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4469
4988
|
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
4470
4989
|
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
4471
4990
|
If shuffle is False, no shuffling will be performed.
|
|
@@ -4481,7 +5000,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4481
5000
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4482
5001
|
argument can only be specified when `num_shards` is also specified.
|
|
4483
5002
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4484
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
5003
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4485
5004
|
Default: None, which means no cache is used.
|
|
4486
5005
|
|
|
4487
5006
|
Raises:
|
|
@@ -4490,7 +5009,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4490
5009
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4491
5010
|
ValueError: If `usage` is invalid.
|
|
4492
5011
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4493
|
-
ValueError: If `shard_id` is
|
|
5012
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4494
5013
|
|
|
4495
5014
|
Examples:
|
|
4496
5015
|
>>> usps_dataset_dir = "/path/to/usps_dataset_directory"
|
|
@@ -4546,7 +5065,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4546
5065
|
|
|
4547
5066
|
class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
4548
5067
|
"""
|
|
4549
|
-
|
|
5068
|
+
VOC(Visual Object Classes) dataset.
|
|
4550
5069
|
|
|
4551
5070
|
The generated dataset with different task setting has different output columns:
|
|
4552
5071
|
|
|
@@ -4567,8 +5086,9 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4567
5086
|
class will be given a unique index starting from 0.
|
|
4568
5087
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4569
5088
|
Default: None, all images.
|
|
4570
|
-
num_parallel_workers (int, optional): Number of
|
|
4571
|
-
Default: None,
|
|
5089
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
5090
|
+
Default: None, will use global default workers(8), it can be set
|
|
5091
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4572
5092
|
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4573
5093
|
order behavior shown in the table below.
|
|
4574
5094
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -4580,7 +5100,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4580
5100
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4581
5101
|
argument can only be specified when `num_shards` is also specified.
|
|
4582
5102
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4583
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
5103
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4584
5104
|
Default: None, which means no cache is used.
|
|
4585
5105
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
|
|
4586
5106
|
:py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: False.
|
|
@@ -4600,7 +5120,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4600
5120
|
ValueError: If task is not equal 'Segmentation' or 'Detection'.
|
|
4601
5121
|
ValueError: If task equal 'Segmentation' but class_indexing is not None.
|
|
4602
5122
|
ValueError: If txt related to mode is not exist.
|
|
4603
|
-
ValueError: If `shard_id` is
|
|
5123
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4604
5124
|
|
|
4605
5125
|
Note:
|
|
4606
5126
|
- Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op
|
|
@@ -4752,7 +5272,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4752
5272
|
|
|
4753
5273
|
class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
4754
5274
|
"""
|
|
4755
|
-
|
|
5275
|
+
WIDERFace dataset.
|
|
4756
5276
|
|
|
4757
5277
|
When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur",
|
|
4758
5278
|
"expression", "illumination", "occlusion", "pose", "invalid"]. The data type of the `image` column is uint8,
|
|
@@ -4766,8 +5286,9 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
|
4766
5286
|
and 'all' will read all 'train' and 'valid' samples. Default: None, will be set to 'all'.
|
|
4767
5287
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4768
5288
|
Default: None, will read all images.
|
|
4769
|
-
num_parallel_workers (int, optional): Number of
|
|
4770
|
-
Default: None, will use
|
|
5289
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
5290
|
+
Default: None, will use global default workers(8), it can be set
|
|
5291
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4771
5292
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4772
5293
|
Default: None, expected order behavior shown in the table below.
|
|
4773
5294
|
decode (bool, optional): Decode the images after reading. Default: False.
|
|
@@ -4778,7 +5299,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
|
4778
5299
|
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument can only be specified
|
|
4779
5300
|
when `num_shards` is also specified.
|
|
4780
5301
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4781
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0
|
|
5302
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4782
5303
|
Default: None, which means no cache is used.
|
|
4783
5304
|
|
|
4784
5305
|
Raises:
|
|
@@ -4787,7 +5308,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
|
4787
5308
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4788
5309
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4789
5310
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4790
|
-
ValueError: If `shard_id` is
|
|
5311
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4791
5312
|
ValueError: If `usage` is not in ['train', 'test', 'valid', 'all'].
|
|
4792
5313
|
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4793
5314
|
ValueError: If `annotation_file` is not exist.
|